1/* Reading PO files, abstract class.
2   Copyright (C) 1995-1996, 1998, 2000-2005 Free Software Foundation, Inc.
3
4   This file was written by Peter Miller <millerp@canb.auug.org.au>
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software Foundation,
18   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
19
20
21#ifdef HAVE_CONFIG_H
22# include "config.h"
23#endif
24
25/* Specification.  */
26#include "read-po-abstract.h"
27
28#include <stdlib.h>
29#include <string.h>
30
31#include "po-gram.h"
32#include "read-properties.h"
33#include "read-stringtable.h"
34#include "xalloc.h"
35#include "gettext.h"
36
37/* Local variables.  */
38static abstract_po_reader_ty *callback_arg;
39
40
41/* ========================================================================= */
42/* Allocating and freeing instances of abstract_po_reader_ty.  */
43
44
45abstract_po_reader_ty *
46po_reader_alloc (abstract_po_reader_class_ty *method_table)
47{
48  abstract_po_reader_ty *pop;
49
50  pop = (abstract_po_reader_ty *) xmalloc (method_table->size);
51  pop->methods = method_table;
52  if (method_table->constructor)
53    method_table->constructor (pop);
54  return pop;
55}
56
57
58void
59po_reader_free (abstract_po_reader_ty *pop)
60{
61  if (pop->methods->destructor)
62    pop->methods->destructor (pop);
63  free (pop);
64}
65
66
67/* ========================================================================= */
68/* Inline functions to invoke the methods.  */
69
70
71static inline void
72call_parse_brief (abstract_po_reader_ty *pop)
73{
74  if (pop->methods->parse_brief)
75    pop->methods->parse_brief (pop);
76}
77
78static inline void
79call_parse_debrief (abstract_po_reader_ty *pop)
80{
81  if (pop->methods->parse_debrief)
82    pop->methods->parse_debrief (pop);
83}
84
85static inline void
86call_directive_domain (abstract_po_reader_ty *pop, char *name)
87{
88  if (pop->methods->directive_domain)
89    pop->methods->directive_domain (pop, name);
90}
91
92static inline void
93call_directive_message (abstract_po_reader_ty *pop,
94			char *msgid,
95			lex_pos_ty *msgid_pos,
96			char *msgid_plural,
97			char *msgstr, size_t msgstr_len,
98			lex_pos_ty *msgstr_pos,
99			bool force_fuzzy, bool obsolete)
100{
101  if (pop->methods->directive_message)
102    pop->methods->directive_message (pop, msgid, msgid_pos, msgid_plural,
103				     msgstr, msgstr_len, msgstr_pos,
104				     force_fuzzy, obsolete);
105}
106
107static inline void
108call_comment (abstract_po_reader_ty *pop, const char *s)
109{
110  if (pop->methods->comment != NULL)
111    pop->methods->comment (pop, s);
112}
113
114static inline void
115call_comment_dot (abstract_po_reader_ty *pop, const char *s)
116{
117  if (pop->methods->comment_dot != NULL)
118    pop->methods->comment_dot (pop, s);
119}
120
121static inline void
122call_comment_filepos (abstract_po_reader_ty *pop, const char *name, size_t line)
123{
124  if (pop->methods->comment_filepos)
125    pop->methods->comment_filepos (pop, name, line);
126}
127
128static inline void
129call_comment_special (abstract_po_reader_ty *pop, const char *s)
130{
131  if (pop->methods->comment_special != NULL)
132    pop->methods->comment_special (pop, s);
133}
134
135
136/* ========================================================================= */
137/* Exported functions.  */
138
139
140static inline void
141po_scan_start (abstract_po_reader_ty *pop)
142{
143  /* The parse will call the po_callback_... functions (see below)
144     when the various directive are recognised.  The callback_arg
145     variable is used to tell these functions which instance is to
146     have the relevant method invoked.  */
147  callback_arg = pop;
148
149  call_parse_brief (pop);
150}
151
152static inline void
153po_scan_end (abstract_po_reader_ty *pop)
154{
155  call_parse_debrief (pop);
156  callback_arg = NULL;
157}
158
159
160void
161po_scan (abstract_po_reader_ty *pop, FILE *fp,
162	 const char *real_filename, const char *logical_filename,
163	 input_syntax_ty syntax)
164{
165  /* Parse the stream's content.  */
166  switch (syntax)
167    {
168    case syntax_po:
169      lex_start (fp, real_filename, logical_filename);
170      po_scan_start (pop);
171      po_gram_parse ();
172      po_scan_end (pop);
173      lex_end ();
174      break;
175    case syntax_properties:
176      po_scan_start (pop);
177      properties_parse (pop, fp, real_filename, logical_filename);
178      po_scan_end (pop);
179      break;
180    case syntax_stringtable:
181      po_scan_start (pop);
182      stringtable_parse (pop, fp, real_filename, logical_filename);
183      po_scan_end (pop);
184      break;
185    default:
186      abort ();
187    }
188
189  if (error_message_count > 0)
190    po_error (EXIT_FAILURE, 0,
191	      ngettext ("found %d fatal error", "found %d fatal errors",
192			error_message_count),
193	      error_message_count);
194  error_message_count = 0;
195}
196
197
198/* ========================================================================= */
199/* Callbacks used by po-gram.y or po-lex.c, indirectly from po_scan.  */
200
201
202/* This function is called by po_gram_lex() whenever a domain directive
203   has been seen.  */
204void
205po_callback_domain (char *name)
206{
207  /* assert(callback_arg); */
208  call_directive_domain (callback_arg, name);
209}
210
211
212/* This function is called by po_gram_lex() whenever a message has been
213   seen.  */
214void
215po_callback_message (char *msgid, lex_pos_ty *msgid_pos, char *msgid_plural,
216		     char *msgstr, size_t msgstr_len, lex_pos_ty *msgstr_pos,
217		     bool force_fuzzy, bool obsolete)
218{
219  /* assert(callback_arg); */
220  call_directive_message (callback_arg, msgid, msgid_pos, msgid_plural,
221			  msgstr, msgstr_len, msgstr_pos,
222			  force_fuzzy, obsolete);
223}
224
225
226void
227po_callback_comment (const char *s)
228{
229  /* assert(callback_arg); */
230  call_comment (callback_arg, s);
231}
232
233
234void
235po_callback_comment_dot (const char *s)
236{
237  /* assert(callback_arg); */
238  call_comment_dot (callback_arg, s);
239}
240
241
242/* This function is called by po_parse_comment_filepos(), once for each
243   filename.  */
244void
245po_callback_comment_filepos (const char *name, size_t line)
246{
247  /* assert(callback_arg); */
248  call_comment_filepos (callback_arg, name, line);
249}
250
251
252void
253po_callback_comment_special (const char *s)
254{
255  /* assert(callback_arg); */
256  call_comment_special (callback_arg, s);
257}
258
259
260/* Parse a special comment and put the result in *fuzzyp, formatp, *wrapp.  */
261void
262po_parse_comment_special (const char *s,
263			  bool *fuzzyp, enum is_format formatp[NFORMATS],
264			  enum is_wrap *wrapp)
265{
266  size_t i;
267
268  *fuzzyp = false;
269  for (i = 0; i < NFORMATS; i++)
270    formatp[i] = undecided;
271  *wrapp = undecided;
272
273  while (*s != '\0')
274    {
275      const char *t;
276
277      /* Skip whitespace.  */
278      while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) != NULL)
279	s++;
280
281      /* Collect a token.  */
282      t = s;
283      while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) == NULL)
284	s++;
285      if (s != t)
286	{
287	  size_t len = s - t;
288
289	  /* Accept fuzzy flag.  */
290	  if (len == 5 && memcmp (t, "fuzzy", 5) == 0)
291	    {
292	      *fuzzyp = true;
293	      continue;
294	    }
295
296	  /* Accept format description.  */
297	  if (len >= 7 && memcmp (t + len - 7, "-format", 7) == 0)
298	    {
299	      const char *p;
300	      size_t n;
301	      enum is_format value;
302
303	      p = t;
304	      n = len - 7;
305
306	      if (n >= 3 && memcmp (p, "no-", 3) == 0)
307		{
308		  p += 3;
309		  n -= 3;
310		  value = no;
311		}
312	      else if (n >= 9 && memcmp (p, "possible-", 9) == 0)
313		{
314		  p += 9;
315		  n -= 9;
316		  value = possible;
317		}
318	      else if (n >= 11 && memcmp (p, "impossible-", 11) == 0)
319		{
320		  p += 11;
321		  n -= 11;
322		  value = impossible;
323		}
324	      else
325		value = yes;
326
327	      for (i = 0; i < NFORMATS; i++)
328		if (strlen (format_language[i]) == n
329		    && memcmp (format_language[i], p, n) == 0)
330		  {
331		    formatp[i] = value;
332		    break;
333		  }
334	      if (i < NFORMATS)
335		continue;
336	    }
337
338	  /* Accept wrap description.  */
339	  if (len == 4 && memcmp (t, "wrap", 4) == 0)
340	    {
341	      *wrapp = yes;
342	      continue;
343	    }
344	  if (len == 7 && memcmp (t, "no-wrap", 7) == 0)
345	    {
346	      *wrapp = no;
347	      continue;
348	    }
349
350	  /* Unknown special comment marker.  It may have been generated
351	     from a future xgettext version.  Ignore it.  */
352	}
353    }
354}
355
356
357/* Parse a GNU style file comment.
358   Syntax: an arbitrary number of
359             STRING COLON NUMBER
360           or
361             STRING
362   The latter style, without line number, occurs in PO files converted e.g.
363   from Pascal .rst files or from OpenOffice resource files.
364   Call po_callback_comment_filepos for each of them.  */
365static void
366po_parse_comment_filepos (const char *s)
367{
368  while (*s != '\0')
369    {
370      while (*s == ' ' || *s == '\t' || *s == '\n')
371	s++;
372      if (*s != '\0')
373	{
374	  const char *string_start = s;
375
376	  do
377	    s++;
378	  while (!(*s == '\0' || *s == ' ' || *s == '\t' || *s == '\n'));
379
380	  /* See if there is a COLON and NUMBER after the STRING, separated
381	     through optional spaces.  */
382	  {
383	    const char *p = s;
384
385	    while (*p == ' ' || *p == '\t' || *p == '\n')
386	      p++;
387
388	    if (*p == ':')
389	      {
390		p++;
391
392		while (*p == ' ' || *p == '\t' || *p == '\n')
393		  p++;
394
395		if (*p >= '0' && *p <= '9')
396		  {
397		    /* Accumulate a number.  */
398		    size_t n = 0;
399
400		    do
401		      {
402			n = n * 10 + (*p - '0');
403			p++;
404		      }
405		    while (*p >= '0' && *p <= '9');
406
407		    if (*p == '\0' || *p == ' ' || *p == '\t' || *p == '\n')
408		      {
409			/* Parsed a GNU style file comment with spaces.  */
410			const char *string_end = s;
411			size_t string_length = string_end - string_start;
412			char *string = (char *) xmalloc (string_length + 1);
413
414			memcpy (string, string_start, string_length);
415			string[string_length] = '\0';
416
417			po_callback_comment_filepos (string, n);
418
419			free (string);
420
421			s = p;
422			continue;
423		      }
424		  }
425	      }
426	  }
427
428	  /* See if there is a COLON at the end of STRING and a NUMBER after
429	     it, separated through optional spaces.  */
430	  if (s[-1] == ':')
431	    {
432	      const char *p = s;
433
434	      while (*p == ' ' || *p == '\t' || *p == '\n')
435		p++;
436
437	      if (*p >= '0' && *p <= '9')
438		{
439		  /* Accumulate a number.  */
440		  size_t n = 0;
441
442		  do
443		    {
444		      n = n * 10 + (*p - '0');
445		      p++;
446		    }
447		  while (*p >= '0' && *p <= '9');
448
449		  if (*p == '\0' || *p == ' ' || *p == '\t' || *p == '\n')
450		    {
451		      /* Parsed a GNU style file comment with spaces.  */
452		      const char *string_end = s - 1;
453		      size_t string_length = string_end - string_start;
454		      char *string = (char *) xmalloc (string_length + 1);
455
456		      memcpy (string, string_start, string_length);
457		      string[string_length] = '\0';
458
459		      po_callback_comment_filepos (string, n);
460
461		      free (string);
462
463		      s = p;
464		      continue;
465		    }
466		}
467	    }
468
469	  /* See if there is a COLON and NUMBER at the end of the STRING,
470	     without separating spaces.  */
471	  {
472	    const char *p = s;
473
474	    while (p > string_start)
475	      {
476		p--;
477		if (!(*p >= '0' && *p <= '9'))
478		  {
479		    p++;
480		    break;
481		  }
482	      }
483
484	    /* p now points to the beginning of the trailing digits segment
485	       at the end of STRING.  */
486
487	    if (p < s
488		&& p > string_start + 1
489		&& p[-1] == ':')
490	      {
491		/* Parsed a GNU style file comment without spaces.  */
492		const char *string_end = p - 1;
493
494		/* Accumulate a number.  */
495		{
496		  size_t n = 0;
497
498		  do
499		    {
500		      n = n * 10 + (*p - '0');
501		      p++;
502		    }
503		  while (p < s);
504
505		  {
506		    size_t string_length = string_end - string_start;
507		    char *string = (char *) xmalloc (string_length + 1);
508
509		    memcpy (string, string_start, string_length);
510		    string[string_length] = '\0';
511
512		    po_callback_comment_filepos (string, n);
513
514		    free (string);
515
516		    continue;
517		  }
518		}
519	      }
520	  }
521
522	  /* Parsed a file comment without line number.  */
523	  {
524	    const char *string_end = s;
525	    size_t string_length = string_end - string_start;
526	    char *string = (char *) xmalloc (string_length + 1);
527
528	    memcpy (string, string_start, string_length);
529	    string[string_length] = '\0';
530
531	    po_callback_comment_filepos (string, (size_t)(-1));
532
533	    free (string);
534	  }
535	}
536    }
537}
538
539
540/* Parse a SunOS or Solaris style file comment.
541   Syntax of SunOS style:
542     FILE_KEYWORD COLON STRING COMMA LINE_KEYWORD COLON NUMBER
543   Syntax of Solaris style:
544     FILE_KEYWORD COLON STRING COMMA LINE_KEYWORD NUMBER_KEYWORD COLON NUMBER
545   where
546     FILE_KEYWORD ::= "file" | "File"
547     COLON ::= ":"
548     COMMA ::= ","
549     LINE_KEYWORD ::= "line"
550     NUMBER_KEYWORD ::= "number"
551     NUMBER ::= [0-9]+
552   Return true if parsed, false if not a comment of this form. */
553static bool
554po_parse_comment_solaris_filepos (const char *s)
555{
556  if (s[0] == ' '
557      && (s[1] == 'F' || s[1] == 'f')
558      && s[2] == 'i' && s[3] == 'l' && s[4] == 'e'
559      && s[5] == ':')
560    {
561      const char *string_start;
562      const char *string_end;
563
564      {
565	const char *p = s + 6;
566
567	while (*p == ' ' || *p == '\t')
568	  p++;
569	string_start = p;
570      }
571
572      for (string_end = string_start; *string_end != '\0'; string_end++)
573	{
574	  const char *p = string_end;
575
576	  while (*p == ' ' || *p == '\t')
577	    p++;
578
579	  if (*p == ',')
580	    {
581	      p++;
582
583	      while (*p == ' ' || *p == '\t')
584		p++;
585
586	      if (p[0] == 'l' && p[1] == 'i' && p[2] == 'n' && p[3] == 'e')
587		{
588		  p += 4;
589
590		  while (*p == ' ' || *p == '\t')
591		    p++;
592
593		  if (p[0] == 'n' && p[1] == 'u' && p[2] == 'm'
594		      && p[3] == 'b' && p[4] == 'e' && p[5] == 'r')
595		    {
596		      p += 6;
597		      while (*p == ' ' || *p == '\t')
598			p++;
599		    }
600
601		  if (*p == ':')
602		    {
603		      p++;
604
605		      if (*p >= '0' && *p <= '9')
606			{
607			  /* Accumulate a number.  */
608			  size_t n = 0;
609
610			  do
611			    {
612			      n = n * 10 + (*p - '0');
613			      p++;
614			    }
615			  while (*p >= '0' && *p <= '9');
616
617			  while (*p == ' ' || *p == '\t' || *p == '\n')
618			    p++;
619
620			  if (*p == '\0')
621			    {
622			      /* Parsed a Sun style file comment.  */
623			      size_t string_length = string_end - string_start;
624			      char *string =
625				(char *) xmalloc (string_length + 1);
626
627			      memcpy (string, string_start, string_length);
628			      string[string_length] = '\0';
629
630			      po_callback_comment_filepos (string, n);
631
632			      free (string);
633			      return true;
634			    }
635			}
636		    }
637		}
638	    }
639	}
640    }
641
642  return false;
643}
644
645
646/* This function is called by po_gram_lex() whenever a comment is
647   seen.  It analyzes the comment to see what sort it is, and then
648   dispatches it to the appropriate method: call_comment, call_comment_dot,
649   call_comment_filepos (via po_parse_comment_filepos), or
650   call_comment_special.  */
651void
652po_callback_comment_dispatcher (const char *s)
653{
654  if (*s == '.')
655    po_callback_comment_dot (s + 1);
656  else if (*s == ':')
657    {
658      /* Parse the file location string.  The appropriate callback will be
659	 invoked.  */
660      po_parse_comment_filepos (s + 1);
661    }
662  else if (*s == ',' || *s == '!')
663    {
664      /* Get all entries in the special comment line.  */
665      po_callback_comment_special (s + 1);
666    }
667  else
668    {
669      /* It looks like a plain vanilla comment, but Solaris-style file
670	 position lines do, too.  Try to parse the lot.  If the parse
671	 succeeds, the appropriate callback will be invoked.  */
672      if (po_parse_comment_solaris_filepos (s))
673	/* Do nothing, it is a Sun-style file pos line.  */ ;
674      else
675	po_callback_comment (s);
676    }
677}
678