• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-WNDR4500v2-V1.0.0.60_1.0.38/ap/gpl/timemachine/gettext-0.17/gettext-tools/src/
1/* Message list charset and locale charset handling.
2   Copyright (C) 2001-2003, 2005-2007 Free Software Foundation, Inc.
3   Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4
5   This program is free software: you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3 of the License, or
8   (at your option) any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18
19#ifdef HAVE_CONFIG_H
20# include "config.h"
21#endif
22#include <alloca.h>
23
24/* Specification.  */
25#include "msgl-iconv.h"
26
27#include <stdbool.h>
28#include <stdlib.h>
29#include <string.h>
30
31#if HAVE_ICONV
32# include <iconv.h>
33#endif
34
35#include "progname.h"
36#include "basename.h"
37#include "message.h"
38#include "po-charset.h"
39#include "xstriconv.h"
40#include "msgl-ascii.h"
41#include "xalloc.h"
42#include "xmalloca.h"
43#include "c-strstr.h"
44#include "xvasprintf.h"
45#include "po-xerror.h"
46#include "gettext.h"
47
48#define _(str) gettext (str)
49
50
51#if HAVE_ICONV
52
53static void conversion_error (const struct conversion_context* context)
54#if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
55     __attribute__ ((noreturn))
56#endif
57;
58static void
59conversion_error (const struct conversion_context* context)
60{
61  if (context->to_code == po_charset_utf8)
62    /* If a conversion to UTF-8 fails, the problem lies in the input.  */
63    po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false,
64	       xasprintf (_("%s: input is not valid in \"%s\" encoding"),
65			  context->from_filename, context->from_code));
66  else
67    po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false,
68	       xasprintf (_("\
69%s: error while converting from \"%s\" encoding to \"%s\" encoding"),
70			  context->from_filename, context->from_code,
71			  context->to_code));
72  /* NOTREACHED */
73  abort ();
74}
75
76char *
77convert_string (iconv_t cd, const char *string,
78		const struct conversion_context* context)
79{
80  size_t len = strlen (string) + 1;
81  char *result = NULL;
82  size_t resultlen = 0;
83
84  if (xmem_cd_iconv (string, len, cd, &result, &resultlen) == 0)
85    /* Verify the result has exactly one NUL byte, at the end.  */
86    if (resultlen > 0 && result[resultlen - 1] == '\0'
87	&& strlen (result) == resultlen - 1)
88      return result;
89
90  conversion_error (context);
91  /* NOTREACHED */
92  return NULL;
93}
94
95static void
96convert_string_list (iconv_t cd, string_list_ty *slp,
97		     const struct conversion_context* context)
98{
99  size_t i;
100
101  if (slp != NULL)
102    for (i = 0; i < slp->nitems; i++)
103      slp->item[i] = convert_string (cd, slp->item[i], context);
104}
105
106static void
107convert_prev_msgid (iconv_t cd, message_ty *mp,
108		    const struct conversion_context* context)
109{
110  if (mp->prev_msgctxt != NULL)
111    mp->prev_msgctxt = convert_string (cd, mp->prev_msgctxt, context);
112  if (mp->prev_msgid != NULL)
113    mp->prev_msgid = convert_string (cd, mp->prev_msgid, context);
114  if (mp->prev_msgid_plural != NULL)
115    mp->prev_msgid_plural = convert_string (cd, mp->prev_msgid_plural, context);
116}
117
118static void
119convert_msgid (iconv_t cd, message_ty *mp,
120	       const struct conversion_context* context)
121{
122  if (mp->msgctxt != NULL)
123    mp->msgctxt = convert_string (cd, mp->msgctxt, context);
124  mp->msgid = convert_string (cd, mp->msgid, context);
125  if (mp->msgid_plural != NULL)
126    mp->msgid_plural = convert_string (cd, mp->msgid_plural, context);
127}
128
129static void
130convert_msgstr (iconv_t cd, message_ty *mp,
131		const struct conversion_context* context)
132{
133  char *result = NULL;
134  size_t resultlen = 0;
135
136  if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
137    abort ();
138
139  if (xmem_cd_iconv (mp->msgstr, mp->msgstr_len, cd, &result, &resultlen) == 0)
140    /* Verify the result has a NUL byte at the end.  */
141    if (resultlen > 0 && result[resultlen - 1] == '\0')
142      /* Verify the result has the same number of NUL bytes.  */
143      {
144	const char *p;
145	const char *pend;
146	int nulcount1;
147	int nulcount2;
148
149	for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
150	     p < pend;
151	     p += strlen (p) + 1, nulcount1++);
152	for (p = result, pend = p + resultlen, nulcount2 = 0;
153	     p < pend;
154	     p += strlen (p) + 1, nulcount2++);
155
156	if (nulcount1 == nulcount2)
157	  {
158	    mp->msgstr = result;
159	    mp->msgstr_len = resultlen;
160	    return;
161	  }
162      }
163
164  conversion_error (context);
165}
166
167#endif
168
169
170static bool
171iconv_message_list_internal (message_list_ty *mlp,
172			     const char *canon_from_code,
173			     const char *canon_to_code,
174			     bool update_header,
175			     const char *from_filename)
176{
177  bool canon_from_code_overridden = (canon_from_code != NULL);
178  bool msgids_changed;
179  size_t j;
180
181  /* If the list is empty, nothing to do.  */
182  if (mlp->nitems == 0)
183    return false;
184
185  /* Search the header entry, and extract and replace the charset name.  */
186  for (j = 0; j < mlp->nitems; j++)
187    if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
188      {
189	const char *header = mlp->item[j]->msgstr;
190
191	if (header != NULL)
192	  {
193	    const char *charsetstr = c_strstr (header, "charset=");
194
195	    if (charsetstr != NULL)
196	      {
197		size_t len;
198		char *charset;
199		const char *canon_charset;
200
201		charsetstr += strlen ("charset=");
202		len = strcspn (charsetstr, " \t\n");
203		charset = (char *) xmalloca (len + 1);
204		memcpy (charset, charsetstr, len);
205		charset[len] = '\0';
206
207		canon_charset = po_charset_canonicalize (charset);
208		if (canon_charset == NULL)
209		  {
210		    if (!canon_from_code_overridden)
211		      {
212			/* Don't give an error for POT files, because POT
213			   files usually contain only ASCII msgids.  */
214			const char *filename = from_filename;
215			size_t filenamelen;
216
217			if (filename != NULL
218			    && (filenamelen = strlen (filename)) >= 4
219			    && memcmp (filename + filenamelen - 4, ".pot", 4)
220			       == 0
221			    && strcmp (charset, "CHARSET") == 0)
222			  canon_charset = po_charset_ascii;
223			else
224			  po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0,
225				     false, xasprintf (_("\
226present charset \"%s\" is not a portable encoding name"),
227						charset));
228		      }
229		  }
230		else
231		  {
232		    if (canon_from_code == NULL)
233		      canon_from_code = canon_charset;
234		    else if (canon_from_code != canon_charset)
235		      po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0,  0,
236				 false,
237				 xasprintf (_("\
238two different charsets \"%s\" and \"%s\" in input file"),
239					    canon_from_code, canon_charset));
240		  }
241		freea (charset);
242
243		if (update_header)
244		  {
245		    size_t len1, len2, len3;
246		    char *new_header;
247
248		    len1 = charsetstr - header;
249		    len2 = strlen (canon_to_code);
250		    len3 = (header + strlen (header)) - (charsetstr + len);
251		    new_header = XNMALLOC (len1 + len2 + len3 + 1, char);
252		    memcpy (new_header, header, len1);
253		    memcpy (new_header + len1, canon_to_code, len2);
254		    memcpy (new_header + len1 + len2, charsetstr + len,
255			    len3 + 1);
256		    mlp->item[j]->msgstr = new_header;
257		    mlp->item[j]->msgstr_len = len1 + len2 + len3 + 1;
258		  }
259	      }
260	  }
261      }
262  if (canon_from_code == NULL)
263    {
264      if (is_ascii_message_list (mlp))
265	canon_from_code = po_charset_ascii;
266      else
267	po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
268		   _("\
269input file doesn't contain a header entry with a charset specification"));
270    }
271
272  msgids_changed = false;
273
274  /* If the two encodings are the same, nothing to do.  */
275  if (canon_from_code != canon_to_code)
276    {
277#if HAVE_ICONV
278      iconv_t cd;
279      struct conversion_context context;
280
281      /* Avoid glibc-2.1 bug with EUC-KR.  */
282# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
283      if (strcmp (canon_from_code, "EUC-KR") == 0)
284	cd = (iconv_t)(-1);
285      else
286# endif
287      cd = iconv_open (canon_to_code, canon_from_code);
288      if (cd == (iconv_t)(-1))
289	po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
290		   xasprintf (_("\
291Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
292and iconv() does not support this conversion."),
293			      canon_from_code, canon_to_code,
294			      basename (program_name)));
295
296      context.from_code = canon_from_code;
297      context.to_code = canon_to_code;
298      context.from_filename = from_filename;
299
300      for (j = 0; j < mlp->nitems; j++)
301	{
302	  message_ty *mp = mlp->item[j];
303
304	  if ((mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt))
305	      || !is_ascii_string (mp->msgid))
306	    msgids_changed = true;
307	  context.message = mp;
308	  convert_string_list (cd, mp->comment, &context);
309	  convert_string_list (cd, mp->comment_dot, &context);
310	  convert_prev_msgid (cd, mp, &context);
311	  convert_msgid (cd, mp, &context);
312	  convert_msgstr (cd, mp, &context);
313	}
314
315      iconv_close (cd);
316
317      if (msgids_changed)
318	if (message_list_msgids_changed (mlp))
319	  po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
320		     xasprintf (_("\
321Conversion from \"%s\" to \"%s\" introduces duplicates: \
322some different msgids become equal."),
323				canon_from_code, canon_to_code));
324#else
325	  po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
326		     xasprintf (_("\
327Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
328This version was built without iconv()."),
329				canon_from_code, canon_to_code,
330				basename (program_name)));
331#endif
332    }
333
334  return msgids_changed;
335}
336
337bool
338iconv_message_list (message_list_ty *mlp,
339		    const char *canon_from_code, const char *canon_to_code,
340		    const char *from_filename)
341{
342  return iconv_message_list_internal (mlp,
343				      canon_from_code, canon_to_code, true,
344				      from_filename);
345}
346
347msgdomain_list_ty *
348iconv_msgdomain_list (msgdomain_list_ty *mdlp,
349		      const char *to_code,
350		      bool update_header,
351		      const char *from_filename)
352{
353  const char *canon_to_code;
354  size_t k;
355
356  /* Canonicalize target encoding.  */
357  canon_to_code = po_charset_canonicalize (to_code);
358  if (canon_to_code == NULL)
359    po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
360	       xasprintf (_("\
361target charset \"%s\" is not a portable encoding name."),
362			  to_code));
363
364  for (k = 0; k < mdlp->nitems; k++)
365    iconv_message_list_internal (mdlp->item[k]->messages,
366				 mdlp->encoding, canon_to_code, update_header,
367				 from_filename);
368
369  mdlp->encoding = canon_to_code;
370  return mdlp;
371}
372
373#if HAVE_ICONV
374
375static bool
376iconvable_string (iconv_t cd, const char *string)
377{
378  size_t len = strlen (string) + 1;
379  char *result = NULL;
380  size_t resultlen = 0;
381
382  if (xmem_cd_iconv (string, len, cd, &result, &resultlen) == 0)
383    {
384      /* Test if the result has exactly one NUL byte, at the end.  */
385      bool ok = (resultlen > 0 && result[resultlen - 1] == '\0'
386		 && strlen (result) == resultlen - 1);
387      free (result);
388      return ok;
389    }
390  return false;
391}
392
393static bool
394iconvable_string_list (iconv_t cd, string_list_ty *slp)
395{
396  size_t i;
397
398  if (slp != NULL)
399    for (i = 0; i < slp->nitems; i++)
400      if (!iconvable_string (cd, slp->item[i]))
401	return false;
402  return true;
403}
404
405static bool
406iconvable_prev_msgid (iconv_t cd, message_ty *mp)
407{
408  if (mp->prev_msgctxt != NULL)
409    if (!iconvable_string (cd, mp->prev_msgctxt))
410      return false;
411  if (mp->prev_msgid != NULL)
412    if (!iconvable_string (cd, mp->prev_msgid))
413      return false;
414  if (mp->msgid_plural != NULL)
415    if (!iconvable_string (cd, mp->prev_msgid_plural))
416      return false;
417  return true;
418}
419
420static bool
421iconvable_msgid (iconv_t cd, message_ty *mp)
422{
423  if (mp->msgctxt != NULL)
424    if (!iconvable_string (cd, mp->msgctxt))
425      return false;
426  if (!iconvable_string (cd, mp->msgid))
427    return false;
428  if (mp->msgid_plural != NULL)
429    if (!iconvable_string (cd, mp->msgid_plural))
430      return false;
431  return true;
432}
433
434static bool
435iconvable_msgstr (iconv_t cd, message_ty *mp)
436{
437  char *result = NULL;
438  size_t resultlen = 0;
439
440  if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
441    abort ();
442
443  if (xmem_cd_iconv (mp->msgstr, mp->msgstr_len, cd, &result, &resultlen) == 0)
444    {
445      bool ok = false;
446
447      /* Test if the result has a NUL byte at the end.  */
448      if (resultlen > 0 && result[resultlen - 1] == '\0')
449	/* Test if the result has the same number of NUL bytes.  */
450	{
451	  const char *p;
452	  const char *pend;
453	  int nulcount1;
454	  int nulcount2;
455
456	  for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
457	       p < pend;
458	       p += strlen (p) + 1, nulcount1++);
459	  for (p = result, pend = p + resultlen, nulcount2 = 0;
460	       p < pend;
461	       p += strlen (p) + 1, nulcount2++);
462
463	  if (nulcount1 == nulcount2)
464	    ok = true;
465	}
466
467      free (result);
468      return ok;
469    }
470  return false;
471}
472
473#endif
474
475bool
476is_message_list_iconvable (message_list_ty *mlp,
477			   const char *canon_from_code,
478			   const char *canon_to_code)
479{
480  bool canon_from_code_overridden = (canon_from_code != NULL);
481  size_t j;
482
483  /* If the list is empty, nothing to check.  */
484  if (mlp->nitems == 0)
485    return true;
486
487  /* Search the header entry, and extract the charset name.  */
488  for (j = 0; j < mlp->nitems; j++)
489    if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
490      {
491	const char *header = mlp->item[j]->msgstr;
492
493	if (header != NULL)
494	  {
495	    const char *charsetstr = c_strstr (header, "charset=");
496
497	    if (charsetstr != NULL)
498	      {
499		size_t len;
500		char *charset;
501		const char *canon_charset;
502
503		charsetstr += strlen ("charset=");
504		len = strcspn (charsetstr, " \t\n");
505		charset = (char *) xmalloca (len + 1);
506		memcpy (charset, charsetstr, len);
507		charset[len] = '\0';
508
509		canon_charset = po_charset_canonicalize (charset);
510		if (canon_charset == NULL)
511		  {
512		    if (!canon_from_code_overridden)
513		      {
514			/* Don't give an error for POT files, because POT
515			   files usually contain only ASCII msgids.  */
516			if (strcmp (charset, "CHARSET") == 0)
517			  canon_charset = po_charset_ascii;
518			else
519			  {
520			    /* charset is not a portable encoding name.  */
521			    freea (charset);
522			    return false;
523			  }
524		      }
525		  }
526		else
527		  {
528		    if (canon_from_code == NULL)
529		      canon_from_code = canon_charset;
530		    else if (canon_from_code != canon_charset)
531		      {
532			/* Two different charsets in input file.  */
533			freea (charset);
534			return false;
535		      }
536		  }
537		freea (charset);
538	      }
539	  }
540      }
541  if (canon_from_code == NULL)
542    {
543      if (is_ascii_message_list (mlp))
544	canon_from_code = po_charset_ascii;
545      else
546	/* Input file lacks a header entry with a charset specification.  */
547	return false;
548    }
549
550  /* If the two encodings are the same, nothing to check.  */
551  if (canon_from_code != canon_to_code)
552    {
553#if HAVE_ICONV
554      iconv_t cd;
555
556      /* Avoid glibc-2.1 bug with EUC-KR.  */
557# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
558      if (strcmp (canon_from_code, "EUC-KR") == 0)
559	cd = (iconv_t)(-1);
560      else
561# endif
562      cd = iconv_open (canon_to_code, canon_from_code);
563      if (cd == (iconv_t)(-1))
564	/* iconv() doesn't support this conversion.  */
565	return false;
566
567      for (j = 0; j < mlp->nitems; j++)
568	{
569	  message_ty *mp = mlp->item[j];
570
571	  if (!(iconvable_string_list (cd, mp->comment)
572		&& iconvable_string_list (cd, mp->comment_dot)
573		&& iconvable_prev_msgid (cd, mp)
574		&& iconvable_msgid (cd, mp)
575		&& iconvable_msgstr (cd, mp)))
576	    return false;
577	}
578
579      iconv_close (cd);
580#else
581      /* This version was built without iconv().  */
582      return false;
583#endif
584    }
585
586  return true;
587}
588