• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-WNDR4500v2-V1.0.0.60_1.0.38/ap/gpl/timemachine/gettext-0.17/gettext-tools/src/
1/* Writing Qt .qm files.
2   Copyright (C) 2003, 2005-2007 Free Software Foundation, Inc.
3   Written by Bruno Haible <bruno@clisp.org>, 2003.
4
5   This program is free software: you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3 of the License, or
8   (at your option) any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18#ifdef HAVE_CONFIG_H
19# include <config.h>
20#endif
21
22/* Specification.  */
23#include "write-qt.h"
24
25#include <assert.h>
26#include <errno.h>
27#include <stdbool.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31
32#include "error.h"
33#include "xerror.h"
34#include "message.h"
35#include "po-charset.h"
36#include "msgl-iconv.h"
37#include "hash-string.h"
38#include "unistr.h"
39#include "xalloc.h"
40#include "obstack.h"
41#include "hash.h"
42#include "binary-io.h"
43#include "fwriteerror.h"
44#include "gettext.h"
45
46#define _(str) gettext (str)
47
48/* Qt .qm files are read by the QTranslator::load() function and written
49   by the Qt QTranslator::save() function.
50
51   The Qt tool 'msg2qm' uses the latter function and can convert PO files
52   to .qm files. But since 'msg2qm' is marked as an "old" tool in Qt 3.0.5's
53   i18n.html documentation and therefore likely to disappear, we provide the
54   same functionality here.
55
56   The format of .qm files, as reverse engineered from the functions
57     QTranslator::save(const QString& filename, SaveMode mode)
58     QTranslator::squeeze(SaveMode mode)
59     QTranslatorMessage::write(QDataStream& stream, bool strip, Prefix prefix)
60     elfHash(const char* name)
61   in qt-3.0.5, is as follows:
62
63     It's a binary data format. Elements are u8 (byte), u16, u32. They are
64     written in big-endian order.
65
66     The file starts with a magic string of 16 bytes:
67       3C B8 64 18 CA EF 9C 95 CD 21 1C BF 60 A1 BD DD
68
69     Then come three sections. Each of the three sections is optional. Each
70     has this structure:
71       struct {
72         u8 section_type; // 0x42 = hashes, 0x69 = messages, 0x2f = contexts
73         u32 length; // number of bytes of the data
74         u8 data[length];
75       };
76
77     In the first section, the hashes section, the data has the following
78     structure:
79       It's a sorted array of
80         struct {
81           u32 hashcode; // elfHash of the concatenation of msgid and
82                         // disambiguating-comment
83           u32 offset; // offset within the data[] of the messages section
84         };
85       It's sorted in ascending order by hashcode as primary sorting criteria
86       and - when the hashcodes are the same - by offset as secondary criteria.
87
88     In the second section, the messages section, the data has the following
89     structure:
90       It's a sequence of records, each representing a message, in no
91       particular order. Each record is a sequence of subsections, each
92       introduced by a particular subsection tag. The possible subsection tags
93       are (and they usually occur in this order):
94         - 03: Translation. Followed by the msgstr in UCS-2 or UTF-16 format:
95               struct {
96                 u32 length;
97                 u16 chars[length/2];
98               };
99         - 08: Disambiguating-comment. Followed by the NUL-terminated,
100               ISO-8859-1 encoded, disambiguating-comment string:
101               struct {
102                 u32 length;    // number of bytes including the NUL at the end
103                 u8 chars[length];
104               };
105         - 06: SourceText, i.e. msgid. Followed by the NUL-terminated,
106               ISO-8859-1 encoded, msgid:
107               struct {
108                 u32 length;    // number of bytes including the NUL at the end
109                 u8 chars[length];
110               };
111         - 02: SourceText16, i.e. msgid. Encoded as UCS-2, but must actually
112               be ISO-8859-1.
113               struct {
114                 u32 length;
115                 u16 chars[length/2];
116               };
117               This subsection tag is obsoleted by SourceText.
118         - 07: Context. Followed by the NUL-terminated, ISO-8859-1 encoded,
119               context string (usually a C++ class name or empty):
120               struct {
121                 u32 length;    // number of bytes including the NUL at the end
122                 u8 chars[length];
123               };
124         - 04: Context16. Encoded as UCS-2, but must actually be ISO-8859-1.
125               struct {
126                 u32 length;
127                 u16 chars[length/2];
128               };
129               This subsection tag is obsoleted by Context.
130         - 05: Hash. Followed by
131               struct {
132                 u32 hashcode; // elfHash of the concatenation of msgid and
133                               // disambiguating-comment
134               };
135         - 01: End. Designates the end of the record. No further data.
136       Usually the following subsections are written, but some of them are
137       optional:
138         - 03: Translation.
139         - 08: Disambiguating-comment (optional).
140         - 06: SourceText (optional).
141         - 07: Context (optional).
142         - 05: Hash.
143         - 01: End.
144       A subsection can be omitted if the value to be output is the same as
145       for the previous record.
146
147     The third section, the contexts section, contains the set of all occurring
148     context strings. This section is optional; it is used to speed up the
149     search. The data is a hash table with the following structure:
150       struct {
151         u16 table_size;
152         u16 buckets[table_size];
153         u8 pool[...];
154       };
155     pool[...] contains:
156       u16 zero;
157       for i = 0, ..., table_size:
158         if there are context strings with elfHash(context)%table_size == i:
159           for all context strings with elfHash(context)%table_size == i:
160             len := min(length(context),255); // truncated to length 255
161             struct {
162               u8 len;
163               u8 chars[len];
164             };
165           struct {
166             u8 zero[1]; // signals the end of this bucket
167             u8 padding[0 or 1]; // padding for even number of bytes
168           };
169     buckets[i] is 0 for an empty bucket, or the offset in pool[] where
170     the context strings for this bucket start, divided by 2.
171     This context section must not be used
172       - if the empty context is used, or
173       - if a context of length > 255 is used, or
174       - if the context pool's size would be > 2^17.
175
176     The elfHash function is the same as our hash_string function, except that
177     at the end it maps a hash code of 0x00000000 to 0x00000001.
178
179   When we convert from PO file format, all disambiguating-comments and
180   contexts are empty, and therefore the contexts section can be omitted.  */
181
182
183/* Write a u8 (a single byte) to the output stream.  */
184static inline void
185write_u8 (FILE *output_file, unsigned char value)
186{
187  putc (value, output_file);
188}
189
190/* Write a u16 (two bytes) to the output stream.  */
191static inline void
192write_u16 (FILE *output_file, unsigned short value)
193{
194  unsigned char data[2];
195
196  data[0] = (value >> 8) & 0xff;
197  data[1] = value & 0xff;
198
199  fwrite (data, 2, 1, output_file);
200}
201
202/* Write a u32 (four bytes) to the output stream.  */
203static inline void
204write_u32 (FILE *output_file, unsigned int value)
205{
206  unsigned char data[4];
207
208  data[0] = (value >> 24) & 0xff;
209  data[1] = (value >> 16) & 0xff;
210  data[2] = (value >> 8) & 0xff;
211  data[3] = value & 0xff;
212
213  fwrite (data, 4, 1, output_file);
214}
215
216
217#define obstack_chunk_alloc xmalloc
218#define obstack_chunk_free free
219
220/* Add a u8 (a single byte) to an obstack.  */
221static void
222append_u8 (struct obstack *mempool, unsigned char value)
223{
224  unsigned char data[1];
225
226  data[0] = value;
227
228  obstack_grow (mempool, data, 1);
229}
230
231/* Add a u16 (two bytes) to an obstack.  */
232static void
233append_u16 (struct obstack *mempool, unsigned short value)
234{
235  unsigned char data[2];
236
237  data[0] = (value >> 8) & 0xff;
238  data[1] = value & 0xff;
239
240  obstack_grow (mempool, data, 2);
241}
242
243/* Add a u32 (four bytes) to an obstack.  */
244static void
245append_u32 (struct obstack *mempool, unsigned int value)
246{
247  unsigned char data[4];
248
249  data[0] = (value >> 24) & 0xff;
250  data[1] = (value >> 16) & 0xff;
251  data[2] = (value >> 8) & 0xff;
252  data[3] = value & 0xff;
253
254  obstack_grow (mempool, data, 4);
255}
256
257/* Add an ISO-8859-1 encoded string to an obstack.  */
258static void
259append_base_string (struct obstack *mempool, const char *string)
260{
261  size_t length = strlen (string) + 1;
262  append_u32 (mempool, length);
263  obstack_grow (mempool, string, length);
264}
265
266/* Add an UTF-16 encoded string to an obstack.  */
267static void
268append_unicode_string (struct obstack *mempool, const unsigned short *string,
269		       size_t length)
270{
271  append_u32 (mempool, length * 2);
272  for (; length > 0; string++, length--)
273    append_u16 (mempool, *string);
274}
275
276/* Retrieve a 4-byte integer from memory.  */
277static inline unsigned int
278peek_u32 (const unsigned char *p)
279{
280  return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
281}
282
283/* Convert an UTF-8 string to ISO-8859-1, without error checking.  */
284static char *
285conv_to_iso_8859_1 (const char *string)
286{
287  size_t length = strlen (string);
288  const char *str = string;
289  const char *str_limit = string + length;
290  /* Conversion to ISO-8859-1 can only reduce the number of bytes.  */
291  char *result = XNMALLOC (length + 1, char);
292  char *q = result;
293
294  while (str < str_limit)
295    {
296      unsigned int uc;
297      str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
298      /* It has already been verified that the string fits in ISO-8859-1.  */
299      if (!(uc < 0x100))
300	abort ();
301      /* Store as ISO-8859-1.  */
302      *q++ = (unsigned char) uc;
303    }
304  *q = '\0';
305  assert (q - result <= length);
306
307  return result;
308}
309
310/* Convert an UTF-8 string to UTF-16, returning its size (number of UTF-16
311   codepoints) in *SIZEP.  */
312static unsigned short *
313conv_to_utf16 (const char *string, size_t *sizep)
314{
315  size_t length = strlen (string);
316  const char *str = string;
317  const char *str_limit = string + length;
318  /* Conversion to UTF-16 can at most double the number of bytes.  */
319  unsigned short *result = XNMALLOC (length, unsigned short);
320  unsigned short *q = result;
321
322  while (str < str_limit)
323    {
324      unsigned int uc;
325      str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
326      if (uc < 0x10000)
327	/* UCS-2 character.  */
328	*q++ = (unsigned short) uc;
329      else
330	{
331	  /* UTF-16 surrogate.  */
332	  *q++ = 0xd800 + ((uc - 0x10000) >> 10);
333	  *q++ = 0xdc00 + ((uc - 0x10000) & 0x3ff);
334	}
335    }
336  assert (q - result <= 2 * length);
337
338  *sizep = q - result;
339  return result;
340}
341
342/* Return the Qt hash code of a string.  */
343static unsigned int
344string_hashcode (const char *str)
345{
346  unsigned int h;
347
348  h = hash_string (str);
349  if (h == 0)
350    h = 1;
351  return h;
352}
353
354/* Compare two entries of the hashes section.  */
355static int
356cmp_hashes (const void *va, const void *vb)
357{
358  const unsigned char *a = (const unsigned char *) va;
359  const unsigned char *b = (const unsigned char *) vb;
360  unsigned int a_hashcode = peek_u32 (a);
361  unsigned int b_hashcode = peek_u32 (b);
362
363  if (a_hashcode != b_hashcode)
364    return (a_hashcode >= b_hashcode ? 1 : -1);
365  else
366    {
367      unsigned int a_offset = peek_u32 (a + 4);
368      unsigned int b_offset = peek_u32 (b + 4);
369
370      if (a_offset != b_offset)
371	return (a_offset >= b_offset ? 1 : -1);
372      else
373	return 0;
374    }
375}
376
377
378/* Write a section to the output stream.  */
379static void
380write_section (FILE *output_file, unsigned char tag, void *data, size_t size)
381{
382  /* A section can be omitted if it is empty.  */
383  if (size > 0)
384    {
385      write_u8 (output_file, tag);
386      write_u32 (output_file, size);
387      fwrite (data, size, 1, output_file);
388    }
389}
390
391
392/* Write an entire .qm file.  */
393static void
394write_qm (FILE *output_file, message_list_ty *mlp)
395{
396  static unsigned char magic[16] =
397    {
398      0x3C, 0xB8, 0x64, 0x18, 0xCA, 0xEF, 0x9C, 0x95,
399      0xCD, 0x21, 0x1C, 0xBF, 0x60, 0xA1, 0xBD, 0xDD
400    };
401  struct obstack hashes_pool;
402  struct obstack messages_pool;
403  size_t j;
404
405  obstack_init (&hashes_pool);
406  obstack_init (&messages_pool);
407
408  /* Prepare the hashes section and the messages section.  */
409  for (j = 0; j < mlp->nitems; j++)
410    {
411      message_ty *mp = mlp->item[j];
412
413      /* No need to emit the header entry, it's not needed at runtime.  */
414      if (!is_header (mp))
415	{
416	  char *msgctxt_as_iso_8859_1 =
417	    conv_to_iso_8859_1 (mp->msgctxt != NULL ? mp->msgctxt : "");
418	  char *msgid_as_iso_8859_1 = conv_to_iso_8859_1 (mp->msgid);
419	  size_t msgstr_len;
420	  unsigned short *msgstr_as_utf16 =
421	    conv_to_utf16 (mp->msgstr, &msgstr_len);
422	  unsigned int hashcode = string_hashcode (msgid_as_iso_8859_1);
423	  unsigned int offset = obstack_object_size (&messages_pool);
424
425	  /* Add a record to the hashes section.  */
426	  append_u32 (&hashes_pool, hashcode);
427	  append_u32 (&hashes_pool, offset);
428
429	  /* Add a record to the messages section.  */
430
431	  append_u8 (&messages_pool, 0x03);
432	  append_unicode_string (&messages_pool, msgstr_as_utf16, msgstr_len);
433
434	  append_u8 (&messages_pool, 0x08);
435	  append_base_string (&messages_pool, "");
436
437	  append_u8 (&messages_pool, 0x06);
438	  append_base_string (&messages_pool, msgid_as_iso_8859_1);
439
440	  append_u8 (&messages_pool, 0x07);
441	  append_base_string (&messages_pool, msgctxt_as_iso_8859_1);
442
443	  append_u8 (&messages_pool, 0x05);
444	  append_u32 (&messages_pool, hashcode);
445
446	  append_u8 (&messages_pool, 0x01);
447
448	  free (msgstr_as_utf16);
449	  free (msgid_as_iso_8859_1);
450	  free (msgctxt_as_iso_8859_1);
451	}
452    }
453
454  /* Sort the hashes section.  */
455  {
456    size_t nstrings = obstack_object_size (&hashes_pool) / 8;
457    if (nstrings > 0)
458      qsort (obstack_base (&hashes_pool), nstrings, 8, cmp_hashes);
459  }
460
461  /* Write the magic number.  */
462  fwrite (magic, sizeof (magic), 1, output_file);
463
464  /* Write the hashes section.  */
465  write_section (output_file, 0x42, obstack_base (&hashes_pool),
466		 obstack_object_size (&hashes_pool));
467
468  /* Write the messages section.  */
469  write_section (output_file, 0x69, obstack_base (&messages_pool),
470		 obstack_object_size (&messages_pool));
471
472  /* Decide whether to write a contexts section.  */
473  {
474    bool can_write_contexts = true;
475
476    for (j = 0; j < mlp->nitems; j++)
477      {
478	message_ty *mp = mlp->item[j];
479
480	if (!is_header (mp))
481	  if (mp->msgctxt == NULL || mp->msgctxt[0] == '\0'
482	      || strlen (mp->msgctxt) > 255)
483	    {
484	      can_write_contexts = false;
485	      break;
486	    }
487      }
488
489    if (can_write_contexts)
490      {
491	hash_table all_contexts;
492	size_t num_contexts;
493	unsigned long table_size;
494
495	/* Collect the contexts, removing duplicates.  */
496	hash_init (&all_contexts, 10);
497	for (j = 0; j < mlp->nitems; j++)
498	  {
499	    message_ty *mp = mlp->item[j];
500
501	    if (!is_header (mp))
502	      hash_insert_entry (&all_contexts,
503				 mp->msgctxt, strlen (mp->msgctxt) + 1,
504				 NULL);
505	  }
506
507	/* Compute the number of different contexts.  */
508	num_contexts = all_contexts.size;
509
510	/* Compute a suitable hash table size.  */
511	table_size = next_prime (num_contexts * 1.7);
512	if (table_size >= 0x10000)
513	  table_size = 65521;
514
515	/* Put the contexts into a hash table of size table_size.  */
516	{
517	  struct list_cell { const char *context; struct list_cell *next; };
518	  struct list_cell *list_memory =
519	    XNMALLOC (table_size, struct list_cell);
520	  struct list_cell *freelist;
521	  struct bucket { struct list_cell *head; struct list_cell **tail; };
522	  struct bucket *buckets = XNMALLOC (table_size, struct bucket);
523	  size_t i;
524
525	  freelist = list_memory;
526
527	  for (i = 0; i < table_size; i++)
528	    {
529	      buckets[i].head = NULL;
530	      buckets[i].tail = &buckets[i].head;
531	    }
532
533	  {
534	    void *iter;
535	    const void *key;
536	    size_t keylen;
537	    void *null;
538
539	    iter = NULL;
540	    while (hash_iterate (&all_contexts, &iter, &key, &keylen, &null)
541		   == 0)
542	      {
543		const char *context = (const char *)key;
544		i = string_hashcode (context) % table_size;
545		freelist->context = context;
546		freelist->next = NULL;
547		*buckets[i].tail = freelist;
548		buckets[i].tail = &freelist->next;
549		freelist++;
550	      }
551	  }
552
553	  /* Determine the total context pool size.  */
554	  {
555	    size_t pool_size;
556
557	    pool_size = 2;
558	    for (i = 0; i < table_size; i++)
559	      if (buckets[i].head != NULL)
560		{
561		  const struct list_cell *p;
562
563		  for (p = buckets[i].head; p != NULL; p = p->next)
564		    pool_size += 1 + strlen (p->context);
565		  pool_size++;
566		  if ((pool_size % 2) != 0)
567		    pool_size++;
568		}
569	    if (pool_size <= 0x20000)
570	      {
571		/* Prepare the contexts section.  */
572		struct obstack contexts_pool;
573		size_t pool_offset;
574
575		obstack_init (&contexts_pool);
576
577		append_u16 (&contexts_pool, table_size);
578		pool_offset = 2;
579		for (i = 0; i < table_size; i++)
580		  if (buckets[i].head != NULL)
581		    {
582		      const struct list_cell *p;
583
584		      append_u16 (&contexts_pool, pool_offset / 2);
585		      for (p = buckets[i].head; p != NULL; p = p->next)
586			pool_offset += 1 + strlen (p->context);
587		      pool_offset++;
588		      if ((pool_offset % 2) != 0)
589			pool_offset++;
590		    }
591		  else
592		    append_u16 (&contexts_pool, 0);
593		if (!(pool_offset == pool_size))
594		  abort ();
595
596		append_u16 (&contexts_pool, 0);
597		pool_offset = 2;
598		for (i = 0; i < table_size; i++)
599		  if (buckets[i].head != NULL)
600		    {
601		      const struct list_cell *p;
602
603		      for (p = buckets[i].head; p != NULL; p = p->next)
604			{
605			  append_u8 (&contexts_pool, strlen (p->context));
606			  obstack_grow (&contexts_pool,
607					p->context, strlen (p->context));
608			  pool_offset += 1 + strlen (p->context);
609			}
610		      append_u8 (&contexts_pool, 0);
611		      pool_offset++;
612		      if ((pool_offset % 2) != 0)
613			{
614			  append_u8 (&contexts_pool, 0);
615			  pool_offset++;
616			}
617		    }
618		if (!(pool_offset == pool_size))
619		  abort ();
620
621		if (!(obstack_object_size (&contexts_pool)
622		      == 2 + 2 * table_size + pool_size))
623		  abort ();
624
625		/* Write the contexts section.  */
626		write_section (output_file, 0x2f, obstack_base (&contexts_pool),
627			       obstack_object_size (&contexts_pool));
628
629		obstack_free (&contexts_pool, NULL);
630	      }
631	  }
632
633	  free (buckets);
634	  free (list_memory);
635	}
636
637	hash_destroy (&all_contexts);
638      }
639  }
640
641  obstack_free (&messages_pool, NULL);
642  obstack_free (&hashes_pool, NULL);
643}
644
645
646int
647msgdomain_write_qt (message_list_ty *mlp, const char *canon_encoding,
648		    const char *domain_name, const char *file_name)
649{
650  FILE *output_file;
651
652  /* If no entry for this domain don't even create the file.  */
653  if (mlp->nitems != 0)
654    {
655      /* Determine whether mlp has plural entries.  */
656      {
657	bool has_plural;
658	size_t j;
659
660	has_plural = false;
661	for (j = 0; j < mlp->nitems; j++)
662	  if (mlp->item[j]->msgid_plural != NULL)
663	    has_plural = true;
664	if (has_plural)
665	  {
666	    multiline_error (xstrdup (""),
667			     xstrdup (_("\
668message catalog has plural form translations\n\
669but the Qt message catalog format doesn't support plural handling\n")));
670	    return 1;
671	  }
672      }
673
674      /* Convert the messages to Unicode.  */
675      iconv_message_list (mlp, canon_encoding, po_charset_utf8, NULL);
676
677      /* Determine whether mlp has non-ISO-8859-1 msgctxt entries.  */
678      {
679	size_t j;
680
681	for (j = 0; j < mlp->nitems; j++)
682	  {
683	    const char *string = mlp->item[j]->msgctxt;
684
685	    if (string != NULL)
686	      {
687		/* An UTF-8 encoded string fits in ISO-8859-1 if and only if
688		   all its bytes are < 0xc4.  */
689		for (; *string; string++)
690		  if ((unsigned char) *string >= 0xc4)
691		    {
692		      multiline_error (xstrdup (""),
693				       xstrdup (_("\
694message catalog has msgctxt strings containing characters outside ISO-8859-1\n\
695but the Qt message catalog format supports Unicode only in the translated\n\
696strings, not in the context strings\n")));
697		      return 1;
698		    }
699	      }
700	  }
701      }
702
703      /* Determine whether mlp has non-ISO-8859-1 msgid entries.  */
704      {
705	size_t j;
706
707	for (j = 0; j < mlp->nitems; j++)
708	  {
709	    const char *string = mlp->item[j]->msgid;
710
711	    /* An UTF-8 encoded string fits in ISO-8859-1 if and only if all
712	       its bytes are < 0xc4.  */
713	    for (; *string; string++)
714	      if ((unsigned char) *string >= 0xc4)
715		{
716		  multiline_error (xstrdup (""),
717				   xstrdup (_("\
718message catalog has msgid strings containing characters outside ISO-8859-1\n\
719but the Qt message catalog format supports Unicode only in the translated\n\
720strings, not in the untranslated strings\n")));
721		  return 1;
722		}
723	  }
724      }
725
726      if (strcmp (domain_name, "-") == 0)
727	{
728	  output_file = stdout;
729	  SET_BINARY (fileno (output_file));
730	}
731      else
732	{
733	  output_file = fopen (file_name, "wb");
734	  if (output_file == NULL)
735	    {
736	      error (0, errno, _("error while opening \"%s\" for writing"),
737		     file_name);
738	      return 1;
739	    }
740	}
741
742      if (output_file != NULL)
743	{
744	  write_qm (output_file, mlp);
745
746	  /* Make sure nothing went wrong.  */
747	  if (fwriteerror (output_file))
748	    error (EXIT_FAILURE, errno, _("error while writing \"%s\" file"),
749		   file_name);
750	}
751    }
752
753  return 0;
754}
755