1/* Writing Java .properties files.
2   Copyright (C) 2003, 2005-2006 Free Software Foundation, Inc.
3   Written by Bruno Haible <bruno@clisp.org>, 2003.
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software Foundation,
17   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18
19#ifdef HAVE_CONFIG_H
20# include <config.h>
21#endif
22
23/* Specification.  */
24#include "write-properties.h"
25
26#include <errno.h>
27#include <stdbool.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31
32#include "error.h"
33#include "message.h"
34#include "msgl-ascii.h"
35#include "msgl-iconv.h"
36#include "po-charset.h"
37#include "utf8-ucs4.h"
38#include "write-po.h"
39#include "xalloc.h"
40
41/* The format of the Java .properties files is documented in the JDK
42   documentation for class java.util.Properties.  In the case of .properties
43   files for PropertyResourceBundle, for each message, the msgid becomes the
44   key (left-hand side) and the msgstr becomes the value (right-hand side)
45   of a "key=value" line.  Messages with plurals are not supported in this
46   format.  */
47
48/* Handling of comments: We copy all comments from the PO file to the
49   .properties file. This is not really needed; it's a service for translators
50   who don't like PO files and prefer to maintain the .properties file.  */
51
52/* Converts a string to JAVA encoding (with \uxxxx sequences for non-ASCII
53   characters).  */
54static const char *
55conv_to_java (const char *string)
56{
57  /* We cannot use iconv to "JAVA" because not all iconv() implementations
58     know about the "JAVA" encoding.  */
59  static const char hexdigit[] = "0123456789abcdef";
60  size_t length;
61  char *result;
62
63  if (is_ascii_string (string))
64    return string;
65
66  length = 0;
67  {
68    const char *str = string;
69    const char *str_limit = str + strlen (str);
70
71    while (str < str_limit)
72      {
73	unsigned int uc;
74	str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
75	length += (uc <= 0x007f ? 1 : uc < 0x10000 ? 6 : 12);
76      }
77  }
78
79  result = (char *) xmalloc (length + 1);
80
81  {
82    char *newstr = result;
83    const char *str = string;
84    const char *str_limit = str + strlen (str);
85
86    while (str < str_limit)
87      {
88	unsigned int uc;
89	str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
90	if (uc <= 0x007f)
91	  /* ASCII characters can be output literally.
92	     We could treat non-ASCII ISO-8859-1 characters (0x0080..0x00FF)
93	     the same way, but there is no point in doing this; Sun's
94	     nativetoascii doesn't do it either.  */
95	  *newstr++ = uc;
96	else if (uc < 0x10000)
97	  {
98	    /* Single UCS-2 'char'  */
99	    sprintf (newstr, "\\u%c%c%c%c",
100		     hexdigit[(uc >> 12) & 0x0f], hexdigit[(uc >> 8) & 0x0f],
101		     hexdigit[(uc >> 4) & 0x0f], hexdigit[uc & 0x0f]);
102	    newstr += 6;
103	  }
104	else
105	  {
106	    /* UTF-16 surrogate: two 'char's.  */
107	    unsigned int uc1 = 0xd800 + ((uc - 0x10000) >> 10);
108	    unsigned int uc2 = 0xdc00 + ((uc - 0x10000) & 0x3ff);
109	    sprintf (newstr, "\\u%c%c%c%c",
110		     hexdigit[(uc1 >> 12) & 0x0f], hexdigit[(uc1 >> 8) & 0x0f],
111		     hexdigit[(uc1 >> 4) & 0x0f], hexdigit[uc1 & 0x0f]);
112	    newstr += 6;
113	    sprintf (newstr, "\\u%c%c%c%c",
114		     hexdigit[(uc2 >> 12) & 0x0f], hexdigit[(uc2 >> 8) & 0x0f],
115		     hexdigit[(uc2 >> 4) & 0x0f], hexdigit[uc2 & 0x0f]);
116	    newstr += 6;
117	  }
118      }
119    *newstr = '\0';
120  }
121
122  return result;
123}
124
125/* Writes a key or value to the file, without newline.  */
126static void
127write_escaped_string (FILE *fp, const char *str, bool in_key)
128{
129  static const char hexdigit[] = "0123456789abcdef";
130  const char *str_limit = str + strlen (str);
131  bool first = true;
132
133  while (str < str_limit)
134    {
135      unsigned int uc;
136      str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
137      /* Whitespace must be escaped.  */
138      if (uc == 0x0020 && (first || in_key))
139	{
140	  putc ('\\', fp);
141	  putc (' ', fp);
142	}
143      else if (uc == 0x0009)
144	{
145	  putc ('\\', fp);
146	  putc ('t', fp);
147	}
148      else if (uc == 0x000a)
149	{
150	  putc ('\\', fp);
151	  putc ('n', fp);
152	}
153      else if (uc == 0x000d)
154	{
155	  putc ('\\', fp);
156	  putc ('r', fp);
157	}
158      else if (uc == 0x000c)
159	{
160	  putc ('\\', fp);
161	  putc ('f', fp);
162	}
163      else if (/* Backslash must be escaped.  */
164	       uc == '\\'
165	       /* Possible comment introducers must be escaped.  */
166	       || uc == '#' || uc == '!'
167	       /* Key terminators must be escaped.  */
168	       || uc == '=' || uc == ':')
169	{
170	  putc ('\\', fp);
171	  putc (uc, fp);
172	}
173      else if (uc >= 0x0020 && uc <= 0x007e)
174	{
175	  /* ASCII characters can be output literally.
176	     We could treat non-ASCII ISO-8859-1 characters (0x0080..0x00FF)
177	     the same way, but there is no point in doing this; Sun's
178	     nativetoascii doesn't do it either.  */
179	  putc (uc, fp);
180	}
181      else if (uc < 0x10000)
182	{
183	  /* Single UCS-2 'char'  */
184	  fprintf (fp, "\\u%c%c%c%c",
185		   hexdigit[(uc >> 12) & 0x0f], hexdigit[(uc >> 8) & 0x0f],
186		   hexdigit[(uc >> 4) & 0x0f], hexdigit[uc & 0x0f]);
187	}
188      else
189	{
190	  /* UTF-16 surrogate: two 'char's.  */
191	  unsigned int uc1 = 0xd800 + ((uc - 0x10000) >> 10);
192	  unsigned int uc2 = 0xdc00 + ((uc - 0x10000) & 0x3ff);
193	  fprintf (fp, "\\u%c%c%c%c",
194		   hexdigit[(uc1 >> 12) & 0x0f], hexdigit[(uc1 >> 8) & 0x0f],
195		   hexdigit[(uc1 >> 4) & 0x0f], hexdigit[uc1 & 0x0f]);
196	  fprintf (fp, "\\u%c%c%c%c",
197		   hexdigit[(uc2 >> 12) & 0x0f], hexdigit[(uc2 >> 8) & 0x0f],
198		   hexdigit[(uc2 >> 4) & 0x0f], hexdigit[uc2 & 0x0f]);
199	}
200      first = false;
201    }
202}
203
204/* Writes a message to the file.  */
205static void
206write_message (FILE *fp, const message_ty *mp, size_t page_width, bool debug)
207{
208  /* Print translator comment if available.  */
209  message_print_comment (mp, fp);
210
211  /* Print xgettext extracted comments.  */
212  message_print_comment_dot (mp, fp);
213
214  /* Print the file position comments.  */
215  message_print_comment_filepos (mp, fp, false, page_width);
216
217  /* Print flag information in special comment.  */
218  message_print_comment_flags (mp, fp, debug);
219
220  /* Put a comment mark if the message is the header or untranslated or
221     fuzzy.  */
222  if (is_header (mp)
223      || mp->msgstr[0] == '\0'
224      || (mp->is_fuzzy && !is_header (mp)))
225    putc ('!', fp);
226
227  /* Now write the untranslated string and the translated string.  */
228  write_escaped_string (fp, mp->msgid, true);
229  putc ('=', fp);
230  write_escaped_string (fp, mp->msgstr, false);
231
232  putc ('\n', fp);
233}
234
235/* Writes an entire message list to the file.  */
236static void
237write_properties (FILE *fp, message_list_ty *mlp, const char *canon_encoding,
238		  size_t page_width, bool debug)
239{
240  bool blank_line;
241  size_t j, i;
242
243  /* Convert the messages to Unicode.  */
244  iconv_message_list (mlp, canon_encoding, po_charset_utf8, NULL);
245  for (j = 0; j < mlp->nitems; ++j)
246    {
247      message_ty *mp = mlp->item[j];
248
249      if (mp->comment != NULL)
250	for (i = 0; i < mp->comment->nitems; ++i)
251	  mp->comment->item[i] = conv_to_java (mp->comment->item[i]);
252      if (mp->comment_dot != NULL)
253	for (i = 0; i < mp->comment_dot->nitems; ++i)
254	  mp->comment_dot->item[i] = conv_to_java (mp->comment_dot->item[i]);
255    }
256
257  /* Loop through the messages.  */
258  blank_line = false;
259  for (j = 0; j < mlp->nitems; ++j)
260    {
261      const message_ty *mp = mlp->item[j];
262
263      if (mp->msgid_plural == NULL && !mp->obsolete)
264	{
265	  if (blank_line)
266	    putc ('\n', fp);
267
268	  write_message (fp, mp, page_width, debug);
269
270	  blank_line = true;
271	}
272    }
273}
274
275/* Output the contents of a PO file in Java .properties syntax.  */
276static void
277msgdomain_list_print_properties (msgdomain_list_ty *mdlp, FILE *fp,
278				 size_t page_width, bool debug)
279{
280  message_list_ty *mlp;
281
282  if (mdlp->nitems == 1)
283    mlp = mdlp->item[0]->messages;
284  else
285    mlp = message_list_alloc (false);
286  write_properties (fp, mlp, mdlp->encoding, page_width, debug);
287}
288
289/* Describes a PO file in Java .properties syntax.  */
290const struct catalog_output_format output_format_properties =
291{
292  msgdomain_list_print_properties,	/* print */
293  true,					/* requires_utf8 */
294  false,				/* supports_multiple_domains */
295  false,				/* supports_contexts */
296  false,				/* supports_plurals */
297  true,					/* alternative_is_po */
298  true					/* alternative_is_java_class */
299};
300