1/* Reading binary .mo files.
2   Copyright (C) 1995-1998, 2000-2006 Free Software Foundation, Inc.
3   Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software Foundation,
17   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18
19#ifdef HAVE_CONFIG_H
20# include <config.h>
21#endif
22
23/* Specification.  */
24#include "read-mo.h"
25
26#include <errno.h>
27#include <stdbool.h>
28#include <stdio.h>
29#include <stddef.h>
30#include <stdlib.h>
31#include <string.h>
32
33/* This include file describes the main part of binary .mo format.  */
34#include "gmo.h"
35
36#include "error.h"
37#include "xalloc.h"
38#include "binary-io.h"
39#include "exit.h"
40#include "message.h"
41#include "format.h"
42#include "gettext.h"
43
44#define _(str) gettext (str)
45
46
47/* We read the file completely into memory.  This is more efficient than
48   lots of lseek().  This struct represents the .mo file in memory.  */
49struct binary_mo_file
50{
51  const char *filename;
52  char *data;
53  size_t size;
54  enum { MO_LITTLE_ENDIAN, MO_BIG_ENDIAN } endian;
55};
56
57
58/* Read the contents of the given input stream.  */
59static void
60read_binary_mo_file (struct binary_mo_file *bfp,
61		     FILE *fp, const char *filename)
62{
63  char *buf = NULL;
64  size_t alloc = 0;
65  size_t size = 0;
66  size_t count;
67
68  while (!feof (fp))
69    {
70      const size_t increment = 4096;
71      if (size + increment > alloc)
72	{
73	  alloc = alloc + alloc / 2;
74	  if (alloc < size + increment)
75	    alloc = size + increment;
76	  buf = (char *) xrealloc (buf, alloc);
77	}
78      count = fread (buf + size, 1, increment, fp);
79      if (count == 0)
80	{
81	  if (ferror (fp))
82	    error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
83		   filename);
84	}
85      else
86	size += count;
87    }
88  buf = (char *) xrealloc (buf, size);
89  bfp->filename = filename;
90  bfp->data = buf;
91  bfp->size = size;
92}
93
94/* Get a 32-bit number from the file, at the given file position.  */
95static nls_uint32
96get_uint32 (const struct binary_mo_file *bfp, size_t offset)
97{
98  nls_uint32 b0, b1, b2, b3;
99
100  if (offset + 4 > bfp->size)
101    error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
102
103  b0 = *(unsigned char *) (bfp->data + offset + 0);
104  b1 = *(unsigned char *) (bfp->data + offset + 1);
105  b2 = *(unsigned char *) (bfp->data + offset + 2);
106  b3 = *(unsigned char *) (bfp->data + offset + 3);
107  if (bfp->endian == MO_LITTLE_ENDIAN)
108    return b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
109  else
110    return (b0 << 24) | (b1 << 16) | (b2 << 8) | b3;
111}
112
113/* Get a static string from the file, at the given file position.  */
114static char *
115get_string (const struct binary_mo_file *bfp, size_t offset, size_t *lengthp)
116{
117  /* See 'struct string_desc'.  */
118  nls_uint32 s_length = get_uint32 (bfp, offset);
119  nls_uint32 s_offset = get_uint32 (bfp, offset + 4);
120
121  if (s_offset + s_length + 1 > bfp->size)
122    error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
123  if (bfp->data[s_offset + s_length] != '\0')
124    error (EXIT_FAILURE, 0,
125	   _("file \"%s\" contains a not NUL terminated string"),
126	   bfp->filename);
127
128  *lengthp = s_length + 1;
129  return bfp->data + s_offset;
130}
131
132/* Get a system dependent string from the file, at the given file position.  */
133static char *
134get_sysdep_string (const struct binary_mo_file *bfp, size_t offset,
135		   const struct mo_file_header *header, size_t *lengthp)
136{
137  /* See 'struct sysdep_string'.  */
138  size_t length;
139  char *string;
140  size_t i;
141  char *p;
142  nls_uint32 s_offset;
143
144  /* Compute the length.  */
145  length = 0;
146  for (i = 4; ; i += 8)
147    {
148      nls_uint32 segsize = get_uint32 (bfp, offset + i);
149      nls_uint32 sysdepref = get_uint32 (bfp, offset + i + 4);
150      nls_uint32 sysdep_segment_offset;
151      nls_uint32 ss_length;
152      nls_uint32 ss_offset;
153      size_t n;
154
155      length += segsize;
156
157      if (sysdepref == SEGMENTS_END)
158	break;
159      if (sysdepref >= header->n_sysdep_segments)
160	/* Invalid.  */
161	  error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format"),
162		 bfp->filename);
163      /* See 'struct sysdep_segment'.  */
164      sysdep_segment_offset = header->sysdep_segments_offset + sysdepref * 8;
165      ss_length = get_uint32 (bfp, sysdep_segment_offset);
166      ss_offset = get_uint32 (bfp, sysdep_segment_offset + 4);
167      if (ss_offset + ss_length > bfp->size)
168	error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
169      if (!(ss_length > 0 && bfp->data[ss_offset + ss_length - 1] == '\0'))
170	{
171	  char location[30];
172	  sprintf (location, "sysdep_segment[%u]", (unsigned int) sysdepref);
173	  error (EXIT_FAILURE, 0,
174		 _("file \"%s\" contains a not NUL terminated string, at %s"),
175		 bfp->filename, location);
176	}
177      n = strlen (bfp->data + ss_offset);
178      length += (n > 1 ? 1 + n + 1 : n);
179    }
180
181  /* Allocate and fill the string.  */
182  string = (char *) xmalloc (length);
183  p = string;
184  s_offset = get_uint32 (bfp, offset);
185  for (i = 4; ; i += 8)
186    {
187      nls_uint32 segsize = get_uint32 (bfp, offset + i);
188      nls_uint32 sysdepref = get_uint32 (bfp, offset + i + 4);
189      nls_uint32 sysdep_segment_offset;
190      nls_uint32 ss_length;
191      nls_uint32 ss_offset;
192      size_t n;
193
194      if (s_offset + segsize > bfp->size)
195	error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
196      memcpy (p, bfp->data + s_offset, segsize);
197      p += segsize;
198      s_offset += segsize;
199
200      if (sysdepref == SEGMENTS_END)
201	break;
202      if (sysdepref >= header->n_sysdep_segments)
203	abort ();
204      /* See 'struct sysdep_segment'.  */
205      sysdep_segment_offset = header->sysdep_segments_offset + sysdepref * 8;
206      ss_length = get_uint32 (bfp, sysdep_segment_offset);
207      ss_offset = get_uint32 (bfp, sysdep_segment_offset + 4);
208      if (ss_offset + ss_length > bfp->size)
209	abort ();
210      if (!(ss_length > 0 && bfp->data[ss_offset + ss_length - 1] == '\0'))
211	abort ();
212      n = strlen (bfp->data + ss_offset);
213      if (n > 1)
214	*p++ = '<';
215      memcpy (p, bfp->data + ss_offset, n);
216      p += n;
217      if (n > 1)
218	*p++ = '>';
219    }
220
221  if (p != string + length)
222    abort ();
223
224  *lengthp = length;
225  return string;
226}
227
228/* Reads an existing .mo file and adds the messages to mlp.  */
229void
230read_mo_file (message_list_ty *mlp, const char *filename)
231{
232  FILE *fp;
233  struct binary_mo_file bf;
234  struct mo_file_header header;
235  unsigned int i;
236  static lex_pos_ty pos = { __FILE__, __LINE__ };
237
238  if (strcmp (filename, "-") == 0 || strcmp (filename, "/dev/stdin") == 0)
239    {
240      fp = stdin;
241      SET_BINARY (fileno (fp));
242    }
243  else
244    {
245      fp = fopen (filename, "rb");
246      if (fp == NULL)
247	error (EXIT_FAILURE, errno,
248	       _("error while opening \"%s\" for reading"), filename);
249    }
250
251  /* Read the file contents into memory.  */
252  read_binary_mo_file (&bf, fp, filename);
253
254  /* Get a 32-bit number from the file header.  */
255# define GET_HEADER_FIELD(field) \
256    get_uint32 (&bf, offsetof (struct mo_file_header, field))
257
258  /* We must grope the file to determine which endian it is.
259     Perversity of the universe tends towards maximum, so it will
260     probably not match the currently executing architecture.  */
261  bf.endian = MO_BIG_ENDIAN;
262  header.magic = GET_HEADER_FIELD (magic);
263  if (header.magic != _MAGIC)
264    {
265      bf.endian = MO_LITTLE_ENDIAN;
266      header.magic = GET_HEADER_FIELD (magic);
267      if (header.magic != _MAGIC)
268	{
269	unrecognised:
270	  error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format"),
271		 filename);
272	}
273    }
274
275  header.revision = GET_HEADER_FIELD (revision);
276
277  /* We support only the major revisions 0 and 1.  */
278  switch (header.revision >> 16)
279    {
280    case 0:
281    case 1:
282      /* Fill the header parts that apply to major revisions 0 and 1.  */
283      header.nstrings = GET_HEADER_FIELD (nstrings);
284      header.orig_tab_offset = GET_HEADER_FIELD (orig_tab_offset);
285      header.trans_tab_offset = GET_HEADER_FIELD (trans_tab_offset);
286      header.hash_tab_size = GET_HEADER_FIELD (hash_tab_size);
287      header.hash_tab_offset = GET_HEADER_FIELD (hash_tab_offset);
288
289      for (i = 0; i < header.nstrings; i++)
290	{
291	  message_ty *mp;
292	  char *msgctxt;
293	  char *msgid;
294	  size_t msgid_len;
295	  char *separator;
296	  char *msgstr;
297	  size_t msgstr_len;
298
299	  /* Read the msgctxt and msgid.  */
300	  msgid = get_string (&bf, header.orig_tab_offset + i * 8,
301			      &msgid_len);
302	  /* Split into msgctxt and msgid.  */
303	  separator = strchr (msgid, MSGCTXT_SEPARATOR);
304	  if (separator != NULL)
305	    {
306	      /* The part before the MSGCTXT_SEPARATOR is the msgctxt.  */
307	      *separator = '\0';
308	      msgctxt = msgid;
309	      msgid = separator + 1;
310	      msgid_len -= msgid - msgctxt;
311	    }
312	  else
313	    msgctxt = NULL;
314
315	  /* Read the msgstr.  */
316	  msgstr = get_string (&bf, header.trans_tab_offset + i * 8,
317			       &msgstr_len);
318
319	  mp = message_alloc (msgctxt,
320			      msgid,
321			      (strlen (msgid) + 1 < msgid_len
322			       ? msgid + strlen (msgid) + 1
323			       : NULL),
324			      msgstr, msgstr_len,
325			      &pos);
326	  message_list_append (mlp, mp);
327	}
328
329      switch (header.revision & 0xffff)
330	{
331	case 0:
332	  break;
333	case 1:
334	default:
335	  /* Fill the header parts that apply to minor revision >= 1.  */
336	  header.n_sysdep_segments = GET_HEADER_FIELD (n_sysdep_segments);
337	  header.sysdep_segments_offset =
338	    GET_HEADER_FIELD (sysdep_segments_offset);
339	  header.n_sysdep_strings = GET_HEADER_FIELD (n_sysdep_strings);
340	  header.orig_sysdep_tab_offset =
341	    GET_HEADER_FIELD (orig_sysdep_tab_offset);
342	  header.trans_sysdep_tab_offset =
343	    GET_HEADER_FIELD (trans_sysdep_tab_offset);
344
345	  for (i = 0; i < header.n_sysdep_strings; i++)
346	    {
347	      message_ty *mp;
348	      char *msgctxt;
349	      char *msgid;
350	      size_t msgid_len;
351	      char *separator;
352	      char *msgstr;
353	      size_t msgstr_len;
354	      nls_uint32 offset;
355	      size_t f;
356
357	      /* Read the msgctxt and msgid.  */
358	      offset = get_uint32 (&bf, header.orig_sysdep_tab_offset + i * 4);
359	      msgid = get_sysdep_string (&bf, offset, &header, &msgid_len);
360	      /* Split into msgctxt and msgid.  */
361	      separator = strchr (msgid, MSGCTXT_SEPARATOR);
362	      if (separator != NULL)
363		{
364		  /* The part before the MSGCTXT_SEPARATOR is the msgctxt.  */
365		  *separator = '\0';
366		  msgctxt = msgid;
367		  msgid = separator + 1;
368		  msgid_len -= msgid - msgctxt;
369		}
370	      else
371		msgctxt = NULL;
372
373	      /* Read the msgstr.  */
374	      offset = get_uint32 (&bf, header.trans_sysdep_tab_offset + i * 4);
375	      msgstr = get_sysdep_string (&bf, offset, &header, &msgstr_len);
376
377	      mp = message_alloc (msgctxt,
378				  msgid,
379				  (strlen (msgid) + 1 < msgid_len
380				   ? msgid + strlen (msgid) + 1
381				   : NULL),
382				  msgstr, msgstr_len,
383				  &pos);
384
385	      /* Only messages with c-format or objc-format annotation are
386		 recognized as having system-dependent strings by msgfmt.
387		 Which one of the two, we don't know.  We have to guess,
388		 assuming that c-format is more probable than objc-format and
389		 that the .mo was likely produced by "msgfmt -c".  */
390	      for (f = format_c; ; f = format_objc)
391		{
392		  bool valid = true;
393		  struct formatstring_parser *parser = formatstring_parsers[f];
394		  const char *str_end;
395		  const char *str;
396
397		  str_end = msgid + msgid_len;
398		  for (str = msgid; str < str_end; str += strlen (str) + 1)
399		    {
400		      char *invalid_reason = NULL;
401		      void *descr = parser->parse (str, false, &invalid_reason);
402
403		      if (descr != NULL)
404			parser->free (descr);
405		      else
406			{
407			  free (invalid_reason);
408			  valid = false;
409			  break;
410			}
411		    }
412		  if (valid)
413		    {
414		      str_end = msgstr + msgstr_len;
415		      for (str = msgstr; str < str_end; str += strlen (str) + 1)
416			{
417			  char *invalid_reason = NULL;
418			  void *descr =
419			    parser->parse (str, true, &invalid_reason);
420
421			  if (descr != NULL)
422			    parser->free (descr);
423			  else
424			    {
425			      free (invalid_reason);
426			      valid = false;
427			      break;
428			    }
429			}
430		    }
431
432		  if (valid)
433		    {
434		      /* Found the most likely among c-format, objc-format.  */
435		      mp->is_format[f] = yes;
436		      break;
437		    }
438
439		  /* Try next f.  */
440		  if (f == format_objc)
441		    break;
442		}
443
444	      message_list_append (mlp, mp);
445	    }
446	  break;
447	}
448      break;
449
450    default:
451      goto unrecognised;
452    }
453
454  if (fp != stdin)
455    fclose (fp);
456}
457