• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-WNDR4500v2-V1.0.0.60_1.0.38/ap/gpl/timemachine/gettext-0.17/gettext-tools/src/
1/* GNU gettext - internationalization aids
2   Copyright (C) 1995-1999, 2000-2007 Free Software Foundation, Inc.
3
4   This file was written by Peter Miller <millerp@canb.auug.org.au>.
5   Multibyte character handling by Bruno Haible <haible@clisp.cons.org>.
6
7   This program is free software: you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3 of the License, or
10   (at your option) any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19
20
21#ifdef HAVE_CONFIG_H
22# include "config.h"
23#endif
24
25/* Specification.  */
26#include "po-lex.h"
27
28#include <errno.h>
29#include <limits.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <stdarg.h>
34
35#if HAVE_ICONV
36# include <iconv.h>
37#endif
38
39#include "c-ctype.h"
40#include "linebreak.h"
41#include "uniwidth.h"
42#include "gettext.h"
43#include "po-charset.h"
44#include "xalloc.h"
45#include "error.h"
46#include "error-progname.h"
47#include "xvasprintf.h"
48#include "po-error.h"
49#include "po-xerror.h"
50#include "pos.h"
51#include "message.h"
52#include "str-list.h"
53#include "po-gram-gen2.h"
54
55#define _(str) gettext(str)
56
57#if HAVE_ICONV
58# include "unistr.h"
59#endif
60
61#if HAVE_DECL_GETC_UNLOCKED
62# undef getc
63# define getc getc_unlocked
64#endif
65
66
67/* Current position within the PO file.  */
68lex_pos_ty gram_pos;
69int gram_pos_column;
70
71
72/* Error handling during the parsing of a PO file.
73   These functions can access gram_pos and gram_pos_column.  */
74
75/* VARARGS1 */
76void
77po_gram_error (const char *fmt, ...)
78{
79  va_list ap;
80  char *buffer;
81
82  va_start (ap, fmt);
83  if (vasprintf (&buffer, fmt, ap) < 0)
84    error (EXIT_FAILURE, 0, _("memory exhausted"));
85  va_end (ap);
86  po_xerror (PO_SEVERITY_ERROR, NULL, gram_pos.file_name, gram_pos.line_number,
87	     gram_pos_column + 1, false, buffer);
88  free (buffer);
89
90  if (error_message_count >= gram_max_allowed_errors)
91    po_error (EXIT_FAILURE, 0, _("too many errors, aborting"));
92}
93
94/* VARARGS2 */
95void
96po_gram_error_at_line (const lex_pos_ty *pp, const char *fmt, ...)
97{
98  va_list ap;
99  char *buffer;
100
101  va_start (ap, fmt);
102  if (vasprintf (&buffer, fmt, ap) < 0)
103    error (EXIT_FAILURE, 0, _("memory exhausted"));
104  va_end (ap);
105  po_xerror (PO_SEVERITY_ERROR, NULL, pp->file_name, pp->line_number,
106	     (size_t)(-1), false, buffer);
107  free (buffer);
108
109  if (error_message_count >= gram_max_allowed_errors)
110    po_error (EXIT_FAILURE, 0, _("too many errors, aborting"));
111}
112
113
114/* The lowest level of PO file parsing converts bytes to multibyte characters.
115   This is needed
116   1. for C compatibility: ISO C 99 section 5.1.1.2 says that the first
117      translation phase maps bytes to characters.
118   2. to keep track of the current column, for the sake of precise error
119      location. Emacs compile.el interprets the column in error messages
120      by default as a screen column number, not as character number.
121   3. to avoid skipping backslash-newline in the midst of a multibyte
122      character. If XY is a multibyte character,  X \ newline Y  is invalid.
123 */
124
125/* Multibyte character data type.  */
126/* Note this depends on po_lex_charset and po_lex_iconv, which get set
127   while the file is being parsed.  */
128
129#define MBCHAR_BUF_SIZE 24
130
131struct mbchar
132{
133  size_t bytes;		/* number of bytes of current character, > 0 */
134#if HAVE_ICONV
135  bool uc_valid;	/* true if uc is a valid Unicode character */
136  unsigned int uc;	/* if uc_valid: the current character */
137#endif
138  char buf[MBCHAR_BUF_SIZE]; /* room for the bytes */
139};
140
141/* We want to pass multibyte characters by reference automatically,
142   therefore we use an array type.  */
143typedef struct mbchar mbchar_t[1];
144
145/* A version of memcpy optimized for the case n <= 1.  */
146static inline void
147memcpy_small (void *dst, const void *src, size_t n)
148{
149  if (n > 0)
150    {
151      char *q = (char *) dst;
152      const char *p = (const char *) src;
153
154      *q = *p;
155      if (--n > 0)
156	do *++q = *++p; while (--n > 0);
157    }
158}
159
160/* EOF (not a real character) is represented with bytes = 0 and
161   uc_valid = false.  */
162static inline bool
163mb_iseof (const mbchar_t mbc)
164{
165  return (mbc->bytes == 0);
166}
167
168/* Access the current character.  */
169static inline const char *
170mb_ptr (const mbchar_t mbc)
171{
172  return mbc->buf;
173}
174static inline size_t
175mb_len (const mbchar_t mbc)
176{
177  return mbc->bytes;
178}
179
180/* Comparison of characters.  */
181
182static inline bool
183mb_iseq (const mbchar_t mbc, char sc)
184{
185  /* Note: It is wrong to compare only mbc->uc, because when the encoding is
186     SHIFT_JIS, mbc->buf[0] == '\\' corresponds to mbc->uc == 0x00A5, but we
187     want to treat it as an escape character, although it looks like a Yen
188     sign.  */
189#if HAVE_ICONV && 0
190  if (mbc->uc_valid)
191    return (mbc->uc == sc); /* wrong! */
192  else
193#endif
194    return (mbc->bytes == 1 && mbc->buf[0] == sc);
195}
196
197static inline bool
198mb_isnul (const mbchar_t mbc)
199{
200#if HAVE_ICONV
201  if (mbc->uc_valid)
202    return (mbc->uc == 0);
203  else
204#endif
205    return (mbc->bytes == 1 && mbc->buf[0] == 0);
206}
207
208static inline int
209mb_cmp (const mbchar_t mbc1, const mbchar_t mbc2)
210{
211#if HAVE_ICONV
212  if (mbc1->uc_valid && mbc2->uc_valid)
213    return (int) mbc1->uc - (int) mbc2->uc;
214  else
215#endif
216    return (mbc1->bytes == mbc2->bytes
217	    ? memcmp (mbc1->buf, mbc2->buf, mbc1->bytes)
218	    : mbc1->bytes < mbc2->bytes
219	      ? (memcmp (mbc1->buf, mbc2->buf, mbc1->bytes) > 0 ? 1 : -1)
220	      : (memcmp (mbc1->buf, mbc2->buf, mbc2->bytes) >= 0 ? 1 : -1));
221}
222
223static inline bool
224mb_equal (const mbchar_t mbc1, const mbchar_t mbc2)
225{
226#if HAVE_ICONV
227  if (mbc1->uc_valid && mbc2->uc_valid)
228    return mbc1->uc == mbc2->uc;
229  else
230#endif
231    return (mbc1->bytes == mbc2->bytes
232	    && memcmp (mbc1->buf, mbc2->buf, mbc1->bytes) == 0);
233}
234
235/* <ctype.h>, <wctype.h> classification.  */
236
237static inline bool
238mb_isascii (const mbchar_t mbc)
239{
240#if HAVE_ICONV
241  if (mbc->uc_valid)
242    return (mbc->uc >= 0x0000 && mbc->uc <= 0x007F);
243  else
244#endif
245    return (mbc->bytes == 1
246#if CHAR_MIN < 0x00 /* to avoid gcc warning */
247	    && mbc->buf[0] >= 0x00
248#endif
249#if CHAR_MAX > 0x7F /* to avoid gcc warning */
250	    && mbc->buf[0] <= 0x7F
251#endif
252	   );
253}
254
255/* Extra <wchar.h> function.  */
256
257/* Unprintable characters appear as a small box of width 1.  */
258#define MB_UNPRINTABLE_WIDTH 1
259
260static int
261mb_width (const mbchar_t mbc)
262{
263#if HAVE_ICONV
264  if (mbc->uc_valid)
265    {
266      unsigned int uc = mbc->uc;
267      const char *encoding =
268	(po_lex_iconv != (iconv_t)(-1) ? po_lex_charset : "");
269      int w = uc_width (uc, encoding);
270      /* For unprintable characters, arbitrarily return 0 for control
271	 characters (except tab) and MB_UNPRINTABLE_WIDTH otherwise.  */
272      if (w >= 0)
273	return w;
274      if (uc >= 0x0000 && uc <= 0x001F)
275	{
276	  if (uc == 0x0009)
277	    return 8 - (gram_pos_column & 7);
278	  return 0;
279	}
280      if ((uc >= 0x007F && uc <= 0x009F) || (uc >= 0x2028 && uc <= 0x2029))
281	return 0;
282      return MB_UNPRINTABLE_WIDTH;
283    }
284  else
285#endif
286    {
287      if (mbc->bytes == 1)
288	{
289	  if (
290#if CHAR_MIN < 0x00 /* to avoid gcc warning */
291	      mbc->buf[0] >= 0x00 &&
292#endif
293	      mbc->buf[0] <= 0x1F)
294	    {
295	      if (mbc->buf[0] == 0x09)
296		return 8 - (gram_pos_column & 7);
297	      return 0;
298	    }
299	  if (mbc->buf[0] == 0x7F)
300	    return 0;
301	}
302      return MB_UNPRINTABLE_WIDTH;
303    }
304}
305
306/* Output.  */
307static inline void
308mb_putc (const mbchar_t mbc, FILE *stream)
309{
310  fwrite (mbc->buf, 1, mbc->bytes, stream);
311}
312
313/* Assignment.  */
314static inline void
315mb_setascii (mbchar_t mbc, char sc)
316{
317  mbc->bytes = 1;
318#if HAVE_ICONV
319  mbc->uc_valid = 1;
320  mbc->uc = sc;
321#endif
322  mbc->buf[0] = sc;
323}
324
325/* Copying a character.  */
326static inline void
327mb_copy (mbchar_t new_mbc, const mbchar_t old_mbc)
328{
329  memcpy_small (&new_mbc->buf[0], &old_mbc->buf[0], old_mbc->bytes);
330  new_mbc->bytes = old_mbc->bytes;
331#if HAVE_ICONV
332  if ((new_mbc->uc_valid = old_mbc->uc_valid))
333    new_mbc->uc = old_mbc->uc;
334#endif
335}
336
337
338/* Multibyte character input.  */
339
340/* Number of characters that can be pushed back.
341   We need 1 for lex_getc, plus 1 for lex_ungetc.  */
342#define NPUSHBACK 2
343
344/* Data type of a multibyte character input stream.  */
345struct mbfile
346{
347  FILE *fp;
348  bool eof_seen;
349  int have_pushback;
350  unsigned int bufcount;
351  char buf[MBCHAR_BUF_SIZE];
352  struct mbchar pushback[NPUSHBACK];
353};
354
355/* We want to pass multibyte streams by reference automatically,
356   therefore we use an array type.  */
357typedef struct mbfile mbfile_t[1];
358
359/* Whether invalid multibyte sequences in the input shall be signalled
360   or silently tolerated.  */
361static bool signal_eilseq;
362
363static inline void
364mbfile_init (mbfile_t mbf, FILE *stream)
365{
366  mbf->fp = stream;
367  mbf->eof_seen = false;
368  mbf->have_pushback = 0;
369  mbf->bufcount = 0;
370}
371
372/* Read the next multibyte character from mbf and put it into mbc.
373   If a read error occurs, errno is set and ferror (mbf->fp) becomes true.  */
374static void
375mbfile_getc (mbchar_t mbc, mbfile_t mbf)
376{
377  size_t bytes;
378
379  /* If EOF has already been seen, don't use getc.  This matters if
380     mbf->fp is connected to an interactive tty.  */
381  if (mbf->eof_seen)
382    goto eof;
383
384  /* Return character pushed back, if there is one.  */
385  if (mbf->have_pushback > 0)
386    {
387      mbf->have_pushback--;
388      mb_copy (mbc, &mbf->pushback[mbf->have_pushback]);
389      return;
390    }
391
392  /* Before using iconv, we need at least one byte.  */
393  if (mbf->bufcount == 0)
394    {
395      int c = getc (mbf->fp);
396      if (c == EOF)
397	{
398	  mbf->eof_seen = true;
399	  goto eof;
400	}
401      mbf->buf[0] = (unsigned char) c;
402      mbf->bufcount++;
403    }
404
405#if HAVE_ICONV
406  if (po_lex_iconv != (iconv_t)(-1))
407    {
408      /* Use iconv on an increasing number of bytes.  Read only as many
409	 bytes from mbf->fp as needed.  This is needed to give reasonable
410	 interactive behaviour when mbf->fp is connected to an interactive
411	 tty.  */
412      for (;;)
413	{
414	  unsigned char scratchbuf[64];
415	  const char *inptr = &mbf->buf[0];
416	  size_t insize = mbf->bufcount;
417	  char *outptr = (char *) &scratchbuf[0];
418	  size_t outsize = sizeof (scratchbuf);
419
420	  size_t res = iconv (po_lex_iconv,
421			      (ICONV_CONST char **) &inptr, &insize,
422			      &outptr, &outsize);
423	  /* We expect that a character has been produced if and only if
424	     some input bytes have been consumed.  */
425	  if ((insize < mbf->bufcount) != (outsize < sizeof (scratchbuf)))
426	    abort ();
427	  if (outsize == sizeof (scratchbuf))
428	    {
429	      /* No character has been produced.  Must be an error.  */
430	      if (res != (size_t)(-1))
431		abort ();
432
433	      if (errno == EILSEQ)
434		{
435		  /* An invalid multibyte sequence was encountered.  */
436		  /* Return a single byte.  */
437		  if (signal_eilseq)
438		    po_gram_error (_("invalid multibyte sequence"));
439		  bytes = 1;
440		  mbc->uc_valid = false;
441		  break;
442		}
443	      else if (errno == EINVAL)
444		{
445		  /* An incomplete multibyte character.  */
446		  int c;
447
448		  if (mbf->bufcount == MBCHAR_BUF_SIZE)
449		    {
450		      /* An overlong incomplete multibyte sequence was
451			 encountered.  */
452		      /* Return a single byte.  */
453		      bytes = 1;
454		      mbc->uc_valid = false;
455		      break;
456		    }
457
458		  /* Read one more byte and retry iconv.  */
459		  c = getc (mbf->fp);
460		  if (c == EOF)
461		    {
462		      mbf->eof_seen = true;
463		      if (ferror (mbf->fp))
464			goto eof;
465		      if (signal_eilseq)
466			po_gram_error (_("\
467incomplete multibyte sequence at end of file"));
468		      bytes = mbf->bufcount;
469		      mbc->uc_valid = false;
470		      break;
471		    }
472		  mbf->buf[mbf->bufcount++] = (unsigned char) c;
473		  if (c == '\n')
474		    {
475		      if (signal_eilseq)
476			po_gram_error (_("\
477incomplete multibyte sequence at end of line"));
478		      bytes = mbf->bufcount - 1;
479		      mbc->uc_valid = false;
480		      break;
481		    }
482		}
483	      else
484		{
485		  const char *errno_description = strerror (errno);
486		  po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
487			     xasprintf ("%s: %s",
488					_("iconv failure"),
489					errno_description));
490		}
491	    }
492	  else
493	    {
494	      size_t outbytes = sizeof (scratchbuf) - outsize;
495	      bytes = mbf->bufcount - insize;
496
497	      /* We expect that one character has been produced.  */
498	      if (bytes == 0)
499		abort ();
500	      if (outbytes == 0)
501		abort ();
502	      /* Convert it from UTF-8 to UCS-4.  */
503	      if (u8_mbtouc (&mbc->uc, scratchbuf, outbytes) < outbytes)
504		{
505		  /* scratchbuf contains an out-of-range Unicode character
506		     (> 0x10ffff).  */
507		  if (signal_eilseq)
508		    po_gram_error (_("invalid multibyte sequence"));
509		  mbc->uc_valid = false;
510		  break;
511		}
512	      mbc->uc_valid = true;
513	      break;
514	    }
515	}
516    }
517  else
518#endif
519    {
520      if (po_lex_weird_cjk
521	  /* Special handling of encodings with CJK structure.  */
522	  && (unsigned char) mbf->buf[0] >= 0x80)
523	{
524	  if (mbf->bufcount == 1)
525	    {
526	      /* Read one more byte.  */
527	      int c = getc (mbf->fp);
528	      if (c == EOF)
529		{
530		  if (ferror (mbf->fp))
531		    {
532		      mbf->eof_seen = true;
533		      goto eof;
534		    }
535		}
536	      else
537		{
538		  mbf->buf[1] = (unsigned char) c;
539		  mbf->bufcount++;
540		}
541	    }
542	  if (mbf->bufcount >= 2 && (unsigned char) mbf->buf[1] >= 0x30)
543	    /* Return a double byte.  */
544	    bytes = 2;
545	  else
546	    /* Return a single byte.  */
547	    bytes = 1;
548	}
549      else
550	{
551	  /* Return a single byte.  */
552	  bytes = 1;
553	}
554#if HAVE_ICONV
555      mbc->uc_valid = false;
556#endif
557    }
558
559  /* Return the multibyte sequence mbf->buf[0..bytes-1].  */
560  memcpy_small (&mbc->buf[0], &mbf->buf[0], bytes);
561  mbc->bytes = bytes;
562
563  mbf->bufcount -= bytes;
564  if (mbf->bufcount > 0)
565    {
566      /* It's not worth calling memmove() for so few bytes.  */
567      unsigned int count = mbf->bufcount;
568      char *p = &mbf->buf[0];
569
570      do
571	{
572	  *p = *(p + bytes);
573	  p++;
574	}
575      while (--count > 0);
576    }
577  return;
578
579eof:
580  /* An mbchar_t with bytes == 0 is used to indicate EOF.  */
581  mbc->bytes = 0;
582#if HAVE_ICONV
583  mbc->uc_valid = false;
584#endif
585  return;
586}
587
588static void
589mbfile_ungetc (const mbchar_t mbc, mbfile_t mbf)
590{
591  if (mbf->have_pushback >= NPUSHBACK)
592    abort ();
593  mb_copy (&mbf->pushback[mbf->have_pushback], mbc);
594  mbf->have_pushback++;
595}
596
597
598/* Lexer variables.  */
599
600static mbfile_t mbf;
601unsigned int gram_max_allowed_errors = 20;
602static bool po_lex_obsolete;
603static bool po_lex_previous;
604static bool pass_comments = false;
605bool pass_obsolete_entries = false;
606
607
608/* Prepare lexical analysis.  */
609void
610lex_start (FILE *fp, const char *real_filename, const char *logical_filename)
611{
612  /* Ignore the logical_filename, because PO file entries already have
613     their file names attached.  But use real_filename for error messages.  */
614  gram_pos.file_name = xstrdup (real_filename);
615
616  mbfile_init (mbf, fp);
617
618  gram_pos.line_number = 1;
619  gram_pos_column = 0;
620  signal_eilseq = true;
621  po_lex_obsolete = false;
622  po_lex_previous = false;
623  po_lex_charset_init ();
624}
625
626/* Terminate lexical analysis.  */
627void
628lex_end ()
629{
630  mbf->fp = NULL;
631  gram_pos.file_name = NULL;
632  gram_pos.line_number = 0;
633  gram_pos_column = 0;
634  signal_eilseq = false;
635  po_lex_obsolete = false;
636  po_lex_previous = false;
637  po_lex_charset_close ();
638}
639
640
641/* Read a single character, dealing with backslash-newline.
642   Also keep track of the current line number and column number.  */
643static void
644lex_getc (mbchar_t mbc)
645{
646  for (;;)
647    {
648      mbfile_getc (mbc, mbf);
649
650      if (mb_iseof (mbc))
651	{
652	  if (ferror (mbf->fp))
653	   bomb:
654	    {
655	      const char *errno_description = strerror (errno);
656	      po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
657			 xasprintf ("%s: %s",
658				    xasprintf (_("error while reading \"%s\""),
659					       gram_pos.file_name),
660				    errno_description));
661	    }
662	  break;
663	}
664
665      if (mb_iseq (mbc, '\n'))
666	{
667	  gram_pos.line_number++;
668	  gram_pos_column = 0;
669	  break;
670	}
671
672      gram_pos_column += mb_width (mbc);
673
674      if (mb_iseq (mbc, '\\'))
675	{
676	  mbchar_t mbc2;
677
678	  mbfile_getc (mbc2, mbf);
679
680	  if (mb_iseof (mbc2))
681	    {
682	      if (ferror (mbf->fp))
683		goto bomb;
684	      break;
685	    }
686
687	  if (!mb_iseq (mbc2, '\n'))
688	    {
689	      mbfile_ungetc (mbc2, mbf);
690	      break;
691	    }
692
693	  gram_pos.line_number++;
694	  gram_pos_column = 0;
695	}
696      else
697	break;
698    }
699}
700
701
702static void
703lex_ungetc (const mbchar_t mbc)
704{
705  if (!mb_iseof (mbc))
706    {
707      if (mb_iseq (mbc, '\n'))
708	/* Decrement the line number, but don't care about the column.  */
709	gram_pos.line_number--;
710      else
711	/* Decrement the column number.  Also works well enough for tabs.  */
712	gram_pos_column -= mb_width (mbc);
713
714      mbfile_ungetc (mbc, mbf);
715    }
716}
717
718
719static int
720keyword_p (const char *s)
721{
722  if (!po_lex_previous)
723    {
724      if (!strcmp (s, "domain"))
725	return DOMAIN;
726      if (!strcmp (s, "msgid"))
727	return MSGID;
728      if (!strcmp (s, "msgid_plural"))
729	return MSGID_PLURAL;
730      if (!strcmp (s, "msgstr"))
731	return MSGSTR;
732      if (!strcmp (s, "msgctxt"))
733	return MSGCTXT;
734    }
735  else
736    {
737      /* Inside a "#|" context, the keywords have a different meaning.  */
738      if (!strcmp (s, "msgid"))
739	return PREV_MSGID;
740      if (!strcmp (s, "msgid_plural"))
741	return PREV_MSGID_PLURAL;
742      if (!strcmp (s, "msgctxt"))
743	return PREV_MSGCTXT;
744    }
745  po_gram_error_at_line (&gram_pos, _("keyword \"%s\" unknown"), s);
746  return NAME;
747}
748
749
750static int
751control_sequence ()
752{
753  mbchar_t mbc;
754  int val;
755  int max;
756
757  lex_getc (mbc);
758  if (mb_len (mbc) == 1)
759    switch (mb_ptr (mbc) [0])
760      {
761      case 'n':
762	return '\n';
763
764      case 't':
765	return '\t';
766
767      case 'b':
768	return '\b';
769
770      case 'r':
771	return '\r';
772
773      case 'f':
774	return '\f';
775
776      case 'v':
777	return '\v';
778
779      case 'a':
780	return '\a';
781
782      case '\\':
783      case '"':
784	return mb_ptr (mbc) [0];
785
786      case '0': case '1': case '2': case '3':
787      case '4': case '5': case '6': case '7':
788	val = 0;
789	max = 0;
790	for (;;)
791	  {
792	    char c = mb_ptr (mbc) [0];
793	    /* Warning: not portable, can't depend on '0'..'7' ordering.  */
794	    val = val * 8 + (c - '0');
795	    if (++max == 3)
796	      break;
797	    lex_getc (mbc);
798	    if (mb_len (mbc) == 1)
799	      switch (mb_ptr (mbc) [0])
800		{
801		case '0': case '1': case '2': case '3':
802		case '4': case '5': case '6': case '7':
803		  continue;
804
805		default:
806		  break;
807		}
808	    lex_ungetc (mbc);
809	    break;
810	  }
811	return val;
812
813      case 'x':
814	lex_getc (mbc);
815	if (mb_iseof (mbc) || mb_len (mbc) != 1
816	    || !c_isxdigit (mb_ptr (mbc) [0]))
817	  break;
818
819	val = 0;
820	for (;;)
821	  {
822	    char c = mb_ptr (mbc) [0];
823	    val *= 16;
824	    if (c_isdigit (c))
825	      /* Warning: not portable, can't depend on '0'..'9' ordering */
826	      val += c - '0';
827	    else if (c_isupper (c))
828	      /* Warning: not portable, can't depend on 'A'..'F' ordering */
829	      val += c - 'A' + 10;
830	    else
831	      /* Warning: not portable, can't depend on 'a'..'f' ordering */
832	      val += c - 'a' + 10;
833
834	    lex_getc (mbc);
835	    if (mb_len (mbc) == 1)
836	      switch (mb_ptr (mbc) [0])
837		{
838		case '0': case '1': case '2': case '3': case '4':
839		case '5': case '6': case '7': case '8': case '9':
840		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
841		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
842		  continue;
843
844		default:
845		  break;
846		}
847	    lex_ungetc (mbc);
848	    break;
849	  }
850	return val;
851
852      /* FIXME: \u and \U are not handled.  */
853      }
854  lex_ungetc (mbc);
855  po_gram_error (_("invalid control sequence"));
856  return ' ';
857}
858
859
860/* Return the next token in the PO file.  The return codes are defined
861   in "po-gram-gen2.h".  Associated data is put in 'po_gram_lval'.  */
862int
863po_gram_lex ()
864{
865  static char *buf;
866  static size_t bufmax;
867  mbchar_t mbc;
868  size_t bufpos;
869
870  for (;;)
871    {
872      lex_getc (mbc);
873
874      if (mb_iseof (mbc))
875	/* Yacc want this for end of file.  */
876	return 0;
877
878      if (mb_len (mbc) == 1)
879	switch (mb_ptr (mbc) [0])
880	  {
881	  case '\n':
882	    po_lex_obsolete = false;
883	    po_lex_previous = false;
884	    /* Ignore whitespace, not relevant for the grammar.  */
885	    break;
886
887	  case ' ':
888	  case '\t':
889	  case '\r':
890	  case '\f':
891	  case '\v':
892	    /* Ignore whitespace, not relevant for the grammar.  */
893	    break;
894
895	  case '#':
896	    lex_getc (mbc);
897	    if (mb_iseq (mbc, '~'))
898	      /* A pseudo-comment beginning with #~ is found.  This is
899		 not a comment.  It is the format for obsolete entries.
900		 We simply discard the "#~" prefix.  The following
901		 characters are expected to be well formed.  */
902	      {
903		po_lex_obsolete = true;
904		/* A pseudo-comment beginning with #~| denotes a previous
905		   untranslated string in an obsolete entry.  This does not
906		   make much sense semantically, and is implemented here
907		   for completeness only.  */
908		lex_getc (mbc);
909		if (mb_iseq (mbc, '|'))
910		  po_lex_previous = true;
911		else
912		  lex_ungetc (mbc);
913		break;
914	      }
915	    if (mb_iseq (mbc, '|'))
916	      /* A pseudo-comment beginning with #| is found.  This is
917		 the previous untranslated string.  We discard the "#|"
918		 prefix, but change the keywords and string returns
919		 accordingly.  */
920	      {
921		po_lex_previous = true;
922		break;
923	      }
924
925	    /* Accumulate comments into a buffer.  If we have been asked
926	       to pass comments, generate a COMMENT token, otherwise
927	       discard it.  */
928	    signal_eilseq = false;
929	    if (pass_comments)
930	      {
931		bufpos = 0;
932		for (;;)
933		  {
934		    while (bufpos + mb_len (mbc) >= bufmax)
935		      {
936			bufmax += 100;
937			buf = xrealloc (buf, bufmax);
938		      }
939		    if (mb_iseof (mbc) || mb_iseq (mbc, '\n'))
940		      break;
941
942		    memcpy_small (&buf[bufpos], mb_ptr (mbc), mb_len (mbc));
943		    bufpos += mb_len (mbc);
944
945		    lex_getc (mbc);
946		  }
947		buf[bufpos] = '\0';
948
949		po_gram_lval.string.string = buf;
950		po_gram_lval.string.pos = gram_pos;
951		po_gram_lval.string.obsolete = po_lex_obsolete;
952		po_lex_obsolete = false;
953		signal_eilseq = true;
954		return COMMENT;
955	      }
956	    else
957	      {
958		/* We do this in separate loop because collecting large
959		   comments while they get not passed to the upper layers
960		   is not very efficient.  */
961		while (!mb_iseof (mbc) && !mb_iseq (mbc, '\n'))
962		  lex_getc (mbc);
963		po_lex_obsolete = false;
964		signal_eilseq = true;
965	      }
966	    break;
967
968	  case '"':
969	    /* Accumulate a string.  */
970	    bufpos = 0;
971	    for (;;)
972	      {
973		lex_getc (mbc);
974		while (bufpos + mb_len (mbc) >= bufmax)
975		  {
976		    bufmax += 100;
977		    buf = xrealloc (buf, bufmax);
978		  }
979		if (mb_iseof (mbc))
980		  {
981		    po_gram_error_at_line (&gram_pos,
982					   _("end-of-file within string"));
983		    break;
984		  }
985		if (mb_iseq (mbc, '\n'))
986		  {
987		    po_gram_error_at_line (&gram_pos,
988					   _("end-of-line within string"));
989		    break;
990		  }
991		if (mb_iseq (mbc, '"'))
992		  break;
993		if (mb_iseq (mbc, '\\'))
994		  {
995		    buf[bufpos++] = control_sequence ();
996		    continue;
997		  }
998
999		/* Add mbc to the accumulator.  */
1000		memcpy_small (&buf[bufpos], mb_ptr (mbc), mb_len (mbc));
1001		bufpos += mb_len (mbc);
1002	      }
1003	    buf[bufpos] = '\0';
1004
1005	    /* Strings cannot contain the msgctxt separator, because it cannot
1006	       be faithfully represented in the msgid of a .mo file.  */
1007	    if (strchr (buf, MSGCTXT_SEPARATOR) != NULL)
1008	      po_gram_error_at_line (&gram_pos,
1009				     _("context separator <EOT> within string"));
1010
1011	    /* FIXME: Treatment of embedded \000 chars is incorrect.  */
1012	    po_gram_lval.string.string = xstrdup (buf);
1013	    po_gram_lval.string.pos = gram_pos;
1014	    po_gram_lval.string.obsolete = po_lex_obsolete;
1015	    return (po_lex_previous ? PREV_STRING : STRING);
1016
1017	  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1018	  case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1019	  case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1020	  case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1021	  case 'y': case 'z':
1022	  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1023	  case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1024	  case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1025	  case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1026	  case 'Y': case 'Z':
1027	  case '_': case '$':
1028	    bufpos = 0;
1029	    for (;;)
1030	      {
1031		char c = mb_ptr (mbc) [0];
1032		if (bufpos + 1 >= bufmax)
1033		  {
1034		    bufmax += 100;
1035		    buf = xrealloc (buf, bufmax);
1036		  }
1037		buf[bufpos++] = c;
1038		lex_getc (mbc);
1039		if (mb_len (mbc) == 1)
1040		  switch (mb_ptr (mbc) [0])
1041		    {
1042		    default:
1043		      break;
1044		    case 'a': case 'b': case 'c': case 'd': case 'e':
1045		    case 'f': case 'g': case 'h': case 'i': case 'j':
1046		    case 'k': case 'l': case 'm': case 'n': case 'o':
1047		    case 'p': case 'q': case 'r': case 's': case 't':
1048		    case 'u': case 'v': case 'w': case 'x': case 'y':
1049		    case 'z':
1050		    case 'A': case 'B': case 'C': case 'D': case 'E':
1051		    case 'F': case 'G': case 'H': case 'I': case 'J':
1052		    case 'K': case 'L': case 'M': case 'N': case 'O':
1053		    case 'P': case 'Q': case 'R': case 'S': case 'T':
1054		    case 'U': case 'V': case 'W': case 'X': case 'Y':
1055		    case 'Z':
1056		    case '_': case '$':
1057		    case '0': case '1': case '2': case '3': case '4':
1058		    case '5': case '6': case '7': case '8': case '9':
1059		      continue;
1060		    }
1061		break;
1062	      }
1063	    lex_ungetc (mbc);
1064
1065	    buf[bufpos] = '\0';
1066
1067	    {
1068	      int k = keyword_p (buf);
1069	      if (k == NAME)
1070		{
1071		  po_gram_lval.string.string = xstrdup (buf);
1072		  po_gram_lval.string.pos = gram_pos;
1073		  po_gram_lval.string.obsolete = po_lex_obsolete;
1074		}
1075	      else
1076		{
1077		  po_gram_lval.pos.pos = gram_pos;
1078		  po_gram_lval.pos.obsolete = po_lex_obsolete;
1079		}
1080	      return k;
1081	    }
1082
1083	  case '0': case '1': case '2': case '3': case '4':
1084	  case '5': case '6': case '7': case '8': case '9':
1085	    bufpos = 0;
1086	    for (;;)
1087	      {
1088		char c = mb_ptr (mbc) [0];
1089		if (bufpos + 1 >= bufmax)
1090		  {
1091		    bufmax += 100;
1092		    buf = xrealloc (buf, bufmax + 1);
1093		  }
1094		buf[bufpos++] = c;
1095		lex_getc (mbc);
1096		if (mb_len (mbc) == 1)
1097		  switch (mb_ptr (mbc) [0])
1098		    {
1099		    default:
1100		      break;
1101
1102		    case '0': case '1': case '2': case '3': case '4':
1103		    case '5': case '6': case '7': case '8': case '9':
1104		      continue;
1105		    }
1106		break;
1107	      }
1108	    lex_ungetc (mbc);
1109
1110	    buf[bufpos] = '\0';
1111
1112	    po_gram_lval.number.number = atol (buf);
1113	    po_gram_lval.number.pos = gram_pos;
1114	    po_gram_lval.number.obsolete = po_lex_obsolete;
1115	    return NUMBER;
1116
1117	  case '[':
1118	    po_gram_lval.pos.pos = gram_pos;
1119	    po_gram_lval.pos.obsolete = po_lex_obsolete;
1120	    return '[';
1121
1122	  case ']':
1123	    po_gram_lval.pos.pos = gram_pos;
1124	    po_gram_lval.pos.obsolete = po_lex_obsolete;
1125	    return ']';
1126
1127	  default:
1128	    /* This will cause a syntax error.  */
1129	    return JUNK;
1130	  }
1131      else
1132	/* This will cause a syntax error.  */
1133	return JUNK;
1134    }
1135}
1136
1137
1138/* po_gram_lex() can return comments as COMMENT.  Switch this on or off.  */
1139void
1140po_lex_pass_comments (bool flag)
1141{
1142  pass_comments = flag;
1143}
1144
1145
1146/* po_gram_lex() can return obsolete entries as if they were normal entries.
1147   Switch this on or off.  */
1148void
1149po_lex_pass_obsolete_entries (bool flag)
1150{
1151  pass_obsolete_entries = flag;
1152}
1153