1/* Message list charset and locale charset handling. 2 Copyright (C) 2001-2003, 2005-2007 Free Software Foundation, Inc. 3 Written by Bruno Haible <haible@clisp.cons.org>, 2001. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18 19#ifdef HAVE_CONFIG_H 20# include "config.h" 21#endif 22#include <alloca.h> 23 24/* Specification. */ 25#include "msgl-iconv.h" 26 27#include <stdbool.h> 28#include <stdlib.h> 29#include <string.h> 30 31#if HAVE_ICONV 32# include <iconv.h> 33#endif 34 35#include "progname.h" 36#include "basename.h" 37#include "message.h" 38#include "po-charset.h" 39#include "xstriconv.h" 40#include "msgl-ascii.h" 41#include "xalloc.h" 42#include "xmalloca.h" 43#include "c-strstr.h" 44#include "xvasprintf.h" 45#include "po-xerror.h" 46#include "gettext.h" 47 48#define _(str) gettext (str) 49 50 51#if HAVE_ICONV 52 53static void conversion_error (const struct conversion_context* context) 54#if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2) 55 __attribute__ ((noreturn)) 56#endif 57; 58static void 59conversion_error (const struct conversion_context* context) 60{ 61 if (context->to_code == po_charset_utf8) 62 /* If a conversion to UTF-8 fails, the problem lies in the input. */ 63 po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false, 64 xasprintf (_("%s: input is not valid in \"%s\" encoding"), 65 context->from_filename, context->from_code)); 66 else 67 po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false, 68 xasprintf (_("\ 69%s: error while converting from \"%s\" encoding to \"%s\" encoding"), 70 context->from_filename, context->from_code, 71 context->to_code)); 72 /* NOTREACHED */ 73 abort (); 74} 75 76char * 77convert_string (iconv_t cd, const char *string, 78 const struct conversion_context* context) 79{ 80 size_t len = strlen (string) + 1; 81 char *result = NULL; 82 size_t resultlen = 0; 83 84 if (xmem_cd_iconv (string, len, cd, &result, &resultlen) == 0) 85 /* Verify the result has exactly one NUL byte, at the end. */ 86 if (resultlen > 0 && result[resultlen - 1] == '\0' 87 && strlen (result) == resultlen - 1) 88 return result; 89 90 conversion_error (context); 91 /* NOTREACHED */ 92 return NULL; 93} 94 95static void 96convert_string_list (iconv_t cd, string_list_ty *slp, 97 const struct conversion_context* context) 98{ 99 size_t i; 100 101 if (slp != NULL) 102 for (i = 0; i < slp->nitems; i++) 103 slp->item[i] = convert_string (cd, slp->item[i], context); 104} 105 106static void 107convert_prev_msgid (iconv_t cd, message_ty *mp, 108 const struct conversion_context* context) 109{ 110 if (mp->prev_msgctxt != NULL) 111 mp->prev_msgctxt = convert_string (cd, mp->prev_msgctxt, context); 112 if (mp->prev_msgid != NULL) 113 mp->prev_msgid = convert_string (cd, mp->prev_msgid, context); 114 if (mp->prev_msgid_plural != NULL) 115 mp->prev_msgid_plural = convert_string (cd, mp->prev_msgid_plural, context); 116} 117 118static void 119convert_msgid (iconv_t cd, message_ty *mp, 120 const struct conversion_context* context) 121{ 122 if (mp->msgctxt != NULL) 123 mp->msgctxt = convert_string (cd, mp->msgctxt, context); 124 mp->msgid = convert_string (cd, mp->msgid, context); 125 if (mp->msgid_plural != NULL) 126 mp->msgid_plural = convert_string (cd, mp->msgid_plural, context); 127} 128 129static void 130convert_msgstr (iconv_t cd, message_ty *mp, 131 const struct conversion_context* context) 132{ 133 char *result = NULL; 134 size_t resultlen = 0; 135 136 if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0')) 137 abort (); 138 139 if (xmem_cd_iconv (mp->msgstr, mp->msgstr_len, cd, &result, &resultlen) == 0) 140 /* Verify the result has a NUL byte at the end. */ 141 if (resultlen > 0 && result[resultlen - 1] == '\0') 142 /* Verify the result has the same number of NUL bytes. */ 143 { 144 const char *p; 145 const char *pend; 146 int nulcount1; 147 int nulcount2; 148 149 for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0; 150 p < pend; 151 p += strlen (p) + 1, nulcount1++); 152 for (p = result, pend = p + resultlen, nulcount2 = 0; 153 p < pend; 154 p += strlen (p) + 1, nulcount2++); 155 156 if (nulcount1 == nulcount2) 157 { 158 mp->msgstr = result; 159 mp->msgstr_len = resultlen; 160 return; 161 } 162 } 163 164 conversion_error (context); 165} 166 167#endif 168 169 170static bool 171iconv_message_list_internal (message_list_ty *mlp, 172 const char *canon_from_code, 173 const char *canon_to_code, 174 bool update_header, 175 const char *from_filename) 176{ 177 bool canon_from_code_overridden = (canon_from_code != NULL); 178 bool msgids_changed; 179 size_t j; 180 181 /* If the list is empty, nothing to do. */ 182 if (mlp->nitems == 0) 183 return false; 184 185 /* Search the header entry, and extract and replace the charset name. */ 186 for (j = 0; j < mlp->nitems; j++) 187 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete) 188 { 189 const char *header = mlp->item[j]->msgstr; 190 191 if (header != NULL) 192 { 193 const char *charsetstr = c_strstr (header, "charset="); 194 195 if (charsetstr != NULL) 196 { 197 size_t len; 198 char *charset; 199 const char *canon_charset; 200 201 charsetstr += strlen ("charset="); 202 len = strcspn (charsetstr, " \t\n"); 203 charset = (char *) xmalloca (len + 1); 204 memcpy (charset, charsetstr, len); 205 charset[len] = '\0'; 206 207 canon_charset = po_charset_canonicalize (charset); 208 if (canon_charset == NULL) 209 { 210 if (!canon_from_code_overridden) 211 { 212 /* Don't give an error for POT files, because POT 213 files usually contain only ASCII msgids. */ 214 const char *filename = from_filename; 215 size_t filenamelen; 216 217 if (filename != NULL 218 && (filenamelen = strlen (filename)) >= 4 219 && memcmp (filename + filenamelen - 4, ".pot", 4) 220 == 0 221 && strcmp (charset, "CHARSET") == 0) 222 canon_charset = po_charset_ascii; 223 else 224 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, 225 false, xasprintf (_("\ 226present charset \"%s\" is not a portable encoding name"), 227 charset)); 228 } 229 } 230 else 231 { 232 if (canon_from_code == NULL) 233 canon_from_code = canon_charset; 234 else if (canon_from_code != canon_charset) 235 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, 236 false, 237 xasprintf (_("\ 238two different charsets \"%s\" and \"%s\" in input file"), 239 canon_from_code, canon_charset)); 240 } 241 freea (charset); 242 243 if (update_header) 244 { 245 size_t len1, len2, len3; 246 char *new_header; 247 248 len1 = charsetstr - header; 249 len2 = strlen (canon_to_code); 250 len3 = (header + strlen (header)) - (charsetstr + len); 251 new_header = XNMALLOC (len1 + len2 + len3 + 1, char); 252 memcpy (new_header, header, len1); 253 memcpy (new_header + len1, canon_to_code, len2); 254 memcpy (new_header + len1 + len2, charsetstr + len, 255 len3 + 1); 256 mlp->item[j]->msgstr = new_header; 257 mlp->item[j]->msgstr_len = len1 + len2 + len3 + 1; 258 } 259 } 260 } 261 } 262 if (canon_from_code == NULL) 263 { 264 if (is_ascii_message_list (mlp)) 265 canon_from_code = po_charset_ascii; 266 else 267 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false, 268 _("\ 269input file doesn't contain a header entry with a charset specification")); 270 } 271 272 msgids_changed = false; 273 274 /* If the two encodings are the same, nothing to do. */ 275 if (canon_from_code != canon_to_code) 276 { 277#if HAVE_ICONV 278 iconv_t cd; 279 struct conversion_context context; 280 281 /* Avoid glibc-2.1 bug with EUC-KR. */ 282# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION 283 if (strcmp (canon_from_code, "EUC-KR") == 0) 284 cd = (iconv_t)(-1); 285 else 286# endif 287 cd = iconv_open (canon_to_code, canon_from_code); 288 if (cd == (iconv_t)(-1)) 289 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false, 290 xasprintf (_("\ 291Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \ 292and iconv() does not support this conversion."), 293 canon_from_code, canon_to_code, 294 basename (program_name))); 295 296 context.from_code = canon_from_code; 297 context.to_code = canon_to_code; 298 context.from_filename = from_filename; 299 300 for (j = 0; j < mlp->nitems; j++) 301 { 302 message_ty *mp = mlp->item[j]; 303 304 if ((mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt)) 305 || !is_ascii_string (mp->msgid)) 306 msgids_changed = true; 307 context.message = mp; 308 convert_string_list (cd, mp->comment, &context); 309 convert_string_list (cd, mp->comment_dot, &context); 310 convert_prev_msgid (cd, mp, &context); 311 convert_msgid (cd, mp, &context); 312 convert_msgstr (cd, mp, &context); 313 } 314 315 iconv_close (cd); 316 317 if (msgids_changed) 318 if (message_list_msgids_changed (mlp)) 319 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false, 320 xasprintf (_("\ 321Conversion from \"%s\" to \"%s\" introduces duplicates: \ 322some different msgids become equal."), 323 canon_from_code, canon_to_code)); 324#else 325 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false, 326 xasprintf (_("\ 327Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \ 328This version was built without iconv()."), 329 canon_from_code, canon_to_code, 330 basename (program_name))); 331#endif 332 } 333 334 return msgids_changed; 335} 336 337bool 338iconv_message_list (message_list_ty *mlp, 339 const char *canon_from_code, const char *canon_to_code, 340 const char *from_filename) 341{ 342 return iconv_message_list_internal (mlp, 343 canon_from_code, canon_to_code, true, 344 from_filename); 345} 346 347msgdomain_list_ty * 348iconv_msgdomain_list (msgdomain_list_ty *mdlp, 349 const char *to_code, 350 bool update_header, 351 const char *from_filename) 352{ 353 const char *canon_to_code; 354 size_t k; 355 356 /* Canonicalize target encoding. */ 357 canon_to_code = po_charset_canonicalize (to_code); 358 if (canon_to_code == NULL) 359 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false, 360 xasprintf (_("\ 361target charset \"%s\" is not a portable encoding name."), 362 to_code)); 363 364 for (k = 0; k < mdlp->nitems; k++) 365 iconv_message_list_internal (mdlp->item[k]->messages, 366 mdlp->encoding, canon_to_code, update_header, 367 from_filename); 368 369 mdlp->encoding = canon_to_code; 370 return mdlp; 371} 372 373#if HAVE_ICONV 374 375static bool 376iconvable_string (iconv_t cd, const char *string) 377{ 378 size_t len = strlen (string) + 1; 379 char *result = NULL; 380 size_t resultlen = 0; 381 382 if (xmem_cd_iconv (string, len, cd, &result, &resultlen) == 0) 383 { 384 /* Test if the result has exactly one NUL byte, at the end. */ 385 bool ok = (resultlen > 0 && result[resultlen - 1] == '\0' 386 && strlen (result) == resultlen - 1); 387 free (result); 388 return ok; 389 } 390 return false; 391} 392 393static bool 394iconvable_string_list (iconv_t cd, string_list_ty *slp) 395{ 396 size_t i; 397 398 if (slp != NULL) 399 for (i = 0; i < slp->nitems; i++) 400 if (!iconvable_string (cd, slp->item[i])) 401 return false; 402 return true; 403} 404 405static bool 406iconvable_prev_msgid (iconv_t cd, message_ty *mp) 407{ 408 if (mp->prev_msgctxt != NULL) 409 if (!iconvable_string (cd, mp->prev_msgctxt)) 410 return false; 411 if (mp->prev_msgid != NULL) 412 if (!iconvable_string (cd, mp->prev_msgid)) 413 return false; 414 if (mp->msgid_plural != NULL) 415 if (!iconvable_string (cd, mp->prev_msgid_plural)) 416 return false; 417 return true; 418} 419 420static bool 421iconvable_msgid (iconv_t cd, message_ty *mp) 422{ 423 if (mp->msgctxt != NULL) 424 if (!iconvable_string (cd, mp->msgctxt)) 425 return false; 426 if (!iconvable_string (cd, mp->msgid)) 427 return false; 428 if (mp->msgid_plural != NULL) 429 if (!iconvable_string (cd, mp->msgid_plural)) 430 return false; 431 return true; 432} 433 434static bool 435iconvable_msgstr (iconv_t cd, message_ty *mp) 436{ 437 char *result = NULL; 438 size_t resultlen = 0; 439 440 if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0')) 441 abort (); 442 443 if (xmem_cd_iconv (mp->msgstr, mp->msgstr_len, cd, &result, &resultlen) == 0) 444 { 445 bool ok = false; 446 447 /* Test if the result has a NUL byte at the end. */ 448 if (resultlen > 0 && result[resultlen - 1] == '\0') 449 /* Test if the result has the same number of NUL bytes. */ 450 { 451 const char *p; 452 const char *pend; 453 int nulcount1; 454 int nulcount2; 455 456 for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0; 457 p < pend; 458 p += strlen (p) + 1, nulcount1++); 459 for (p = result, pend = p + resultlen, nulcount2 = 0; 460 p < pend; 461 p += strlen (p) + 1, nulcount2++); 462 463 if (nulcount1 == nulcount2) 464 ok = true; 465 } 466 467 free (result); 468 return ok; 469 } 470 return false; 471} 472 473#endif 474 475bool 476is_message_list_iconvable (message_list_ty *mlp, 477 const char *canon_from_code, 478 const char *canon_to_code) 479{ 480 bool canon_from_code_overridden = (canon_from_code != NULL); 481 size_t j; 482 483 /* If the list is empty, nothing to check. */ 484 if (mlp->nitems == 0) 485 return true; 486 487 /* Search the header entry, and extract the charset name. */ 488 for (j = 0; j < mlp->nitems; j++) 489 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete) 490 { 491 const char *header = mlp->item[j]->msgstr; 492 493 if (header != NULL) 494 { 495 const char *charsetstr = c_strstr (header, "charset="); 496 497 if (charsetstr != NULL) 498 { 499 size_t len; 500 char *charset; 501 const char *canon_charset; 502 503 charsetstr += strlen ("charset="); 504 len = strcspn (charsetstr, " \t\n"); 505 charset = (char *) xmalloca (len + 1); 506 memcpy (charset, charsetstr, len); 507 charset[len] = '\0'; 508 509 canon_charset = po_charset_canonicalize (charset); 510 if (canon_charset == NULL) 511 { 512 if (!canon_from_code_overridden) 513 { 514 /* Don't give an error for POT files, because POT 515 files usually contain only ASCII msgids. */ 516 if (strcmp (charset, "CHARSET") == 0) 517 canon_charset = po_charset_ascii; 518 else 519 { 520 /* charset is not a portable encoding name. */ 521 freea (charset); 522 return false; 523 } 524 } 525 } 526 else 527 { 528 if (canon_from_code == NULL) 529 canon_from_code = canon_charset; 530 else if (canon_from_code != canon_charset) 531 { 532 /* Two different charsets in input file. */ 533 freea (charset); 534 return false; 535 } 536 } 537 freea (charset); 538 } 539 } 540 } 541 if (canon_from_code == NULL) 542 { 543 if (is_ascii_message_list (mlp)) 544 canon_from_code = po_charset_ascii; 545 else 546 /* Input file lacks a header entry with a charset specification. */ 547 return false; 548 } 549 550 /* If the two encodings are the same, nothing to check. */ 551 if (canon_from_code != canon_to_code) 552 { 553#if HAVE_ICONV 554 iconv_t cd; 555 556 /* Avoid glibc-2.1 bug with EUC-KR. */ 557# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION 558 if (strcmp (canon_from_code, "EUC-KR") == 0) 559 cd = (iconv_t)(-1); 560 else 561# endif 562 cd = iconv_open (canon_to_code, canon_from_code); 563 if (cd == (iconv_t)(-1)) 564 /* iconv() doesn't support this conversion. */ 565 return false; 566 567 for (j = 0; j < mlp->nitems; j++) 568 { 569 message_ty *mp = mlp->item[j]; 570 571 if (!(iconvable_string_list (cd, mp->comment) 572 && iconvable_string_list (cd, mp->comment_dot) 573 && iconvable_prev_msgid (cd, mp) 574 && iconvable_msgid (cd, mp) 575 && iconvable_msgstr (cd, mp))) 576 return false; 577 } 578 579 iconv_close (cd); 580#else 581 /* This version was built without iconv(). */ 582 return false; 583#endif 584 } 585 586 return true; 587} 588