1/* GNU gettext - internationalization aids 2 Copyright (C) 1995-1998, 2000-2007 Free Software Foundation, Inc. 3 This file was written by Peter Miller <millerp@canb.auug.org.au> 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18#ifdef HAVE_CONFIG_H 19# include <config.h> 20#endif 21#include <alloca.h> 22 23#include <getopt.h> 24#include <limits.h> 25#include <stdbool.h> 26#include <stdio.h> 27#include <stdlib.h> 28#include <string.h> 29#include <locale.h> 30 31#include "closeout.h" 32#include "dir-list.h" 33#include "error.h" 34#include "error-progname.h" 35#include "progname.h" 36#include "relocatable.h" 37#include "basename.h" 38#include "message.h" 39#include "read-catalog.h" 40#include "read-po.h" 41#include "read-properties.h" 42#include "read-stringtable.h" 43#include "write-catalog.h" 44#include "write-po.h" 45#include "write-properties.h" 46#include "write-stringtable.h" 47#include "format.h" 48#include "xalloc.h" 49#include "xmalloca.h" 50#include "obstack.h" 51#include "c-strstr.h" 52#include "c-strcase.h" 53#include "po-charset.h" 54#include "msgl-iconv.h" 55#include "msgl-equal.h" 56#include "msgl-fsearch.h" 57#include "lock.h" 58#include "plural-exp.h" 59#include "plural-count.h" 60#include "msgl-check.h" 61#include "po-xerror.h" 62#include "backupfile.h" 63#include "copy-file.h" 64#include "propername.h" 65#include "gettext.h" 66 67#define _(str) gettext (str) 68 69#define obstack_chunk_alloc xmalloc 70#define obstack_chunk_free free 71 72 73/* If true do not print unneeded messages. */ 74static bool quiet; 75 76/* Verbosity level. */ 77static int verbosity_level; 78 79/* Force output of PO file even if empty. */ 80static int force_po; 81 82/* Apply the .pot file to each of the domains in the PO file. */ 83static bool multi_domain_mode = false; 84 85/* Determines whether to use fuzzy matching. */ 86static bool use_fuzzy_matching = true; 87 88/* Determines whether to keep old msgids as previous msgids. */ 89static bool keep_previous = false; 90 91/* List of user-specified compendiums. */ 92static message_list_list_ty *compendiums; 93 94/* List of corresponding filenames. */ 95static string_list_ty *compendium_filenames; 96 97/* Update mode. */ 98static bool update_mode = false; 99static const char *version_control_string; 100static const char *backup_suffix_string; 101 102/* Long options. */ 103static const struct option long_options[] = 104{ 105 { "add-location", no_argument, &line_comment, 1 }, 106 { "backup", required_argument, NULL, CHAR_MAX + 1 }, 107 { "compendium", required_argument, NULL, 'C', }, 108 { "directory", required_argument, NULL, 'D' }, 109 { "escape", no_argument, NULL, 'E' }, 110 { "force-po", no_argument, &force_po, 1 }, 111 { "help", no_argument, NULL, 'h' }, 112 { "indent", no_argument, NULL, 'i' }, 113 { "multi-domain", no_argument, NULL, 'm' }, 114 { "no-escape", no_argument, NULL, 'e' }, 115 { "no-fuzzy-matching", no_argument, NULL, 'N' }, 116 { "no-location", no_argument, &line_comment, 0 }, 117 { "no-wrap", no_argument, NULL, CHAR_MAX + 4 }, 118 { "output-file", required_argument, NULL, 'o' }, 119 { "previous", no_argument, NULL, CHAR_MAX + 7 }, 120 { "properties-input", no_argument, NULL, 'P' }, 121 { "properties-output", no_argument, NULL, 'p' }, 122 { "quiet", no_argument, NULL, 'q' }, 123 { "sort-by-file", no_argument, NULL, 'F' }, 124 { "sort-output", no_argument, NULL, 's' }, 125 { "silent", no_argument, NULL, 'q' }, 126 { "strict", no_argument, NULL, CHAR_MAX + 2 }, 127 { "stringtable-input", no_argument, NULL, CHAR_MAX + 5 }, 128 { "stringtable-output", no_argument, NULL, CHAR_MAX + 6 }, 129 { "suffix", required_argument, NULL, CHAR_MAX + 3 }, 130 { "update", no_argument, NULL, 'U' }, 131 { "verbose", no_argument, NULL, 'v' }, 132 { "version", no_argument, NULL, 'V' }, 133 { "width", required_argument, NULL, 'w', }, 134 { NULL, 0, NULL, 0 } 135}; 136 137 138struct statistics 139{ 140 size_t merged; 141 size_t fuzzied; 142 size_t missing; 143 size_t obsolete; 144}; 145 146 147/* Forward declaration of local functions. */ 148static void usage (int status) 149#if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2) 150 __attribute__ ((noreturn)) 151#endif 152; 153static void compendium (const char *filename); 154static msgdomain_list_ty *merge (const char *fn1, const char *fn2, 155 catalog_input_format_ty input_syntax, 156 msgdomain_list_ty **defp); 157 158 159int 160main (int argc, char **argv) 161{ 162 int opt; 163 bool do_help; 164 bool do_version; 165 char *output_file; 166 msgdomain_list_ty *def; 167 msgdomain_list_ty *result; 168 catalog_input_format_ty input_syntax = &input_format_po; 169 catalog_output_format_ty output_syntax = &output_format_po; 170 bool sort_by_filepos = false; 171 bool sort_by_msgid = false; 172 173 /* Set program name for messages. */ 174 set_program_name (argv[0]); 175 error_print_progname = maybe_print_progname; 176 verbosity_level = 0; 177 quiet = false; 178 gram_max_allowed_errors = UINT_MAX; 179 180#ifdef HAVE_SETLOCALE 181 /* Set locale via LC_ALL. */ 182 setlocale (LC_ALL, ""); 183#endif 184 185 /* Set the text message domain. */ 186 bindtextdomain (PACKAGE, relocate (LOCALEDIR)); 187 bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR)); 188 textdomain (PACKAGE); 189 190 /* Ensure that write errors on stdout are detected. */ 191 atexit (close_stdout); 192 193 /* Set default values for variables. */ 194 do_help = false; 195 do_version = false; 196 output_file = NULL; 197 198 while ((opt = getopt_long (argc, argv, "C:D:eEFhimNo:pPqsUvVw:", 199 long_options, NULL)) 200 != EOF) 201 switch (opt) 202 { 203 case '\0': /* Long option. */ 204 break; 205 206 case 'C': 207 compendium (optarg); 208 break; 209 210 case 'D': 211 dir_list_append (optarg); 212 break; 213 214 case 'e': 215 message_print_style_escape (false); 216 break; 217 218 case 'E': 219 message_print_style_escape (true); 220 break; 221 222 case 'F': 223 sort_by_filepos = true; 224 break; 225 226 case 'h': 227 do_help = true; 228 break; 229 230 case 'i': 231 message_print_style_indent (); 232 break; 233 234 case 'm': 235 multi_domain_mode = true; 236 break; 237 238 case 'N': 239 use_fuzzy_matching = false; 240 break; 241 242 case 'o': 243 output_file = optarg; 244 break; 245 246 case 'p': 247 output_syntax = &output_format_properties; 248 break; 249 250 case 'P': 251 input_syntax = &input_format_properties; 252 break; 253 254 case 'q': 255 quiet = true; 256 break; 257 258 case 's': 259 sort_by_msgid = true; 260 break; 261 262 case 'U': 263 update_mode = true; 264 break; 265 266 case 'v': 267 ++verbosity_level; 268 break; 269 270 case 'V': 271 do_version = true; 272 break; 273 274 case 'w': 275 { 276 int value; 277 char *endp; 278 value = strtol (optarg, &endp, 10); 279 if (endp != optarg) 280 message_page_width_set (value); 281 } 282 break; 283 284 case CHAR_MAX + 1: /* --backup */ 285 version_control_string = optarg; 286 break; 287 288 case CHAR_MAX + 2: /* --strict */ 289 message_print_style_uniforum (); 290 break; 291 292 case CHAR_MAX + 3: /* --suffix */ 293 backup_suffix_string = optarg; 294 break; 295 296 case CHAR_MAX + 4: /* --no-wrap */ 297 message_page_width_ignore (); 298 break; 299 300 case CHAR_MAX + 5: /* --stringtable-input */ 301 input_syntax = &input_format_stringtable; 302 break; 303 304 case CHAR_MAX + 6: /* --stringtable-output */ 305 output_syntax = &output_format_stringtable; 306 break; 307 308 case CHAR_MAX + 7: /* --previous */ 309 keep_previous = true; 310 break; 311 312 default: 313 usage (EXIT_FAILURE); 314 break; 315 } 316 317 /* Version information is requested. */ 318 if (do_version) 319 { 320 printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION); 321 /* xgettext: no-wrap */ 322 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\ 323License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\ 324This is free software: you are free to change and redistribute it.\n\ 325There is NO WARRANTY, to the extent permitted by law.\n\ 326"), 327 "1995-1998, 2000-2007"); 328 printf (_("Written by %s.\n"), proper_name ("Peter Miller")); 329 exit (EXIT_SUCCESS); 330 } 331 332 /* Help is requested. */ 333 if (do_help) 334 usage (EXIT_SUCCESS); 335 336 /* Test whether we have an .po file name as argument. */ 337 if (optind >= argc) 338 { 339 error (EXIT_SUCCESS, 0, _("no input files given")); 340 usage (EXIT_FAILURE); 341 } 342 if (optind + 2 != argc) 343 { 344 error (EXIT_SUCCESS, 0, _("exactly 2 input files required")); 345 usage (EXIT_FAILURE); 346 } 347 348 /* Verify selected options. */ 349 if (update_mode) 350 { 351 if (output_file != NULL) 352 { 353 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), 354 "--update", "--output-file"); 355 } 356 } 357 else 358 { 359 if (version_control_string != NULL) 360 { 361 error (EXIT_SUCCESS, 0, _("%s is only valid with %s"), 362 "--backup", "--update"); 363 usage (EXIT_FAILURE); 364 } 365 if (backup_suffix_string != NULL) 366 { 367 error (EXIT_SUCCESS, 0, _("%s is only valid with %s"), 368 "--suffix", "--update"); 369 usage (EXIT_FAILURE); 370 } 371 } 372 373 if (!line_comment && sort_by_filepos) 374 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), 375 "--no-location", "--sort-by-file"); 376 377 if (sort_by_msgid && sort_by_filepos) 378 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), 379 "--sort-output", "--sort-by-file"); 380 381 /* In update mode, --properties-input implies --properties-output. */ 382 if (update_mode && input_syntax == &input_format_properties) 383 output_syntax = &output_format_properties; 384 /* In update mode, --stringtable-input implies --stringtable-output. */ 385 if (update_mode && input_syntax == &input_format_stringtable) 386 output_syntax = &output_format_stringtable; 387 388 /* Merge the two files. */ 389 result = merge (argv[optind], argv[optind + 1], input_syntax, &def); 390 391 /* Sort the results. */ 392 if (sort_by_filepos) 393 msgdomain_list_sort_by_filepos (result); 394 else if (sort_by_msgid) 395 msgdomain_list_sort_by_msgid (result); 396 397 if (update_mode) 398 { 399 /* Do nothing if the original file and the result are equal. Also do 400 nothing if the original file and the result differ only by the 401 POT-Creation-Date in the header entry; this is needed for projects 402 which don't put the .pot file under CVS. */ 403 if (!msgdomain_list_equal (def, result, true)) 404 { 405 /* Back up def.po. */ 406 enum backup_type backup_type; 407 char *backup_file; 408 409 output_file = argv[optind]; 410 411 if (backup_suffix_string == NULL) 412 { 413 backup_suffix_string = getenv ("SIMPLE_BACKUP_SUFFIX"); 414 if (backup_suffix_string != NULL 415 && backup_suffix_string[0] == '\0') 416 backup_suffix_string = NULL; 417 } 418 if (backup_suffix_string != NULL) 419 simple_backup_suffix = backup_suffix_string; 420 421 backup_type = xget_version (_("backup type"), version_control_string); 422 if (backup_type != none) 423 { 424 backup_file = find_backup_file_name (output_file, backup_type); 425 copy_file_preserving (output_file, backup_file); 426 } 427 428 /* Write the merged message list out. */ 429 msgdomain_list_print (result, output_file, output_syntax, true, 430 false); 431 } 432 } 433 else 434 { 435 /* Write the merged message list out. */ 436 msgdomain_list_print (result, output_file, output_syntax, force_po, 437 false); 438 } 439 440 exit (EXIT_SUCCESS); 441} 442 443 444/* Display usage information and exit. */ 445static void 446usage (int status) 447{ 448 if (status != EXIT_SUCCESS) 449 fprintf (stderr, _("Try `%s --help' for more information.\n"), 450 program_name); 451 else 452 { 453 printf (_("\ 454Usage: %s [OPTION] def.po ref.pot\n\ 455"), program_name); 456 printf ("\n"); 457 /* xgettext: no-wrap */ 458 printf (_("\ 459Merges two Uniforum style .po files together. The def.po file is an\n\ 460existing PO file with translations which will be taken over to the newly\n\ 461created file as long as they still match; comments will be preserved,\n\ 462but extracted comments and file positions will be discarded. The ref.pot\n\ 463file is the last created PO file with up-to-date source references but\n\ 464old translations, or a PO Template file (generally created by xgettext);\n\ 465any translations or comments in the file will be discarded, however dot\n\ 466comments and file positions will be preserved. Where an exact match\n\ 467cannot be found, fuzzy matching is used to produce better results.\n\ 468")); 469 printf ("\n"); 470 printf (_("\ 471Mandatory arguments to long options are mandatory for short options too.\n")); 472 printf ("\n"); 473 printf (_("\ 474Input file location:\n")); 475 printf (_("\ 476 def.po translations referring to old sources\n")); 477 printf (_("\ 478 ref.pot references to new sources\n")); 479 printf (_("\ 480 -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n")); 481 printf (_("\ 482 -C, --compendium=FILE additional library of message translations,\n\ 483 may be specified more than once\n")); 484 printf ("\n"); 485 printf (_("\ 486Operation mode:\n")); 487 printf (_("\ 488 -U, --update update def.po,\n\ 489 do nothing if def.po already up to date\n")); 490 printf ("\n"); 491 printf (_("\ 492Output file location:\n")); 493 printf (_("\ 494 -o, --output-file=FILE write output to specified file\n")); 495 printf (_("\ 496The results are written to standard output if no output file is specified\n\ 497or if it is -.\n")); 498 printf ("\n"); 499 printf (_("\ 500Output file location in update mode:\n")); 501 printf (_("\ 502The result is written back to def.po.\n")); 503 printf (_("\ 504 --backup=CONTROL make a backup of def.po\n")); 505 printf (_("\ 506 --suffix=SUFFIX override the usual backup suffix\n")); 507 printf (_("\ 508The version control method may be selected via the --backup option or through\n\ 509the VERSION_CONTROL environment variable. Here are the values:\n\ 510 none, off never make backups (even if --backup is given)\n\ 511 numbered, t make numbered backups\n\ 512 existing, nil numbered if numbered backups exist, simple otherwise\n\ 513 simple, never always make simple backups\n")); 514 printf (_("\ 515The backup suffix is `~', unless set with --suffix or the SIMPLE_BACKUP_SUFFIX\n\ 516environment variable.\n\ 517")); 518 printf ("\n"); 519 printf (_("\ 520Operation modifiers:\n")); 521 printf (_("\ 522 -m, --multi-domain apply ref.pot to each of the domains in def.po\n")); 523 printf (_("\ 524 -N, --no-fuzzy-matching do not use fuzzy matching\n")); 525 printf (_("\ 526 --previous keep previous msgids of translated messages\n")); 527 printf ("\n"); 528 printf (_("\ 529Input file syntax:\n")); 530 printf (_("\ 531 -P, --properties-input input files are in Java .properties syntax\n")); 532 printf (_("\ 533 --stringtable-input input files are in NeXTstep/GNUstep .strings\n\ 534 syntax\n")); 535 printf ("\n"); 536 printf (_("\ 537Output details:\n")); 538 printf (_("\ 539 -e, --no-escape do not use C escapes in output (default)\n")); 540 printf (_("\ 541 -E, --escape use C escapes in output, no extended chars\n")); 542 printf (_("\ 543 --force-po write PO file even if empty\n")); 544 printf (_("\ 545 -i, --indent indented output style\n")); 546 printf (_("\ 547 --no-location suppress '#: filename:line' lines\n")); 548 printf (_("\ 549 --add-location preserve '#: filename:line' lines (default)\n")); 550 printf (_("\ 551 --strict strict Uniforum output style\n")); 552 printf (_("\ 553 -p, --properties-output write out a Java .properties file\n")); 554 printf (_("\ 555 --stringtable-output write out a NeXTstep/GNUstep .strings file\n")); 556 printf (_("\ 557 -w, --width=NUMBER set output page width\n")); 558 printf (_("\ 559 --no-wrap do not break long message lines, longer than\n\ 560 the output page width, into several lines\n")); 561 printf (_("\ 562 -s, --sort-output generate sorted output\n")); 563 printf (_("\ 564 -F, --sort-by-file sort output by file location\n")); 565 printf ("\n"); 566 printf (_("\ 567Informative output:\n")); 568 printf (_("\ 569 -h, --help display this help and exit\n")); 570 printf (_("\ 571 -V, --version output version information and exit\n")); 572 printf (_("\ 573 -v, --verbose increase verbosity level\n")); 574 printf (_("\ 575 -q, --quiet, --silent suppress progress indicators\n")); 576 printf ("\n"); 577 /* TRANSLATORS: The placeholder indicates the bug-reporting address 578 for this package. Please add _another line_ saying 579 "Report translation bugs to <...>\n" with the address for translation 580 bugs (typically your translation team's web or email address). */ 581 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), 582 stdout); 583 } 584 585 exit (status); 586} 587 588 589static void 590compendium (const char *filename) 591{ 592 msgdomain_list_ty *mdlp; 593 size_t k; 594 595 mdlp = read_catalog_file (filename, &input_format_po); 596 if (compendiums == NULL) 597 { 598 compendiums = message_list_list_alloc (); 599 compendium_filenames = string_list_alloc (); 600 } 601 for (k = 0; k < mdlp->nitems; k++) 602 { 603 message_list_list_append (compendiums, mdlp->item[k]->messages); 604 string_list_append (compendium_filenames, filename); 605 } 606} 607 608 609/* Data structure representing the messages with known translations. 610 They are composed of 611 - A message list from def.po, 612 - The compendiums. 613 The data structure is optimized for exact and fuzzy searches. */ 614typedef struct definitions_ty definitions_ty; 615struct definitions_ty 616{ 617 /* A list of message lists. The first comes from def.po, the other ones 618 from the compendiums. Each message list has a built-in hash table, 619 for speed when doing the exact searches. */ 620 message_list_list_ty *lists; 621 /* A fuzzy index of the compendiums, for speed when doing fuzzy searches. 622 Used only if use_fuzzy_matching is true and compendiums != NULL. */ 623 message_fuzzy_index_ty *findex; 624 /* A once-only execution guard for the initialization of the fuzzy index. 625 Needed for OpenMP. */ 626 gl_lock_define(, findex_init_lock) 627 /* The canonical encoding of the compendiums. */ 628 const char *canon_charset; 629}; 630 631static inline void 632definitions_init (definitions_ty *definitions, const char *canon_charset) 633{ 634 definitions->lists = message_list_list_alloc (); 635 message_list_list_append (definitions->lists, NULL); 636 if (compendiums != NULL) 637 message_list_list_append_list (definitions->lists, compendiums); 638 definitions->findex = NULL; 639 gl_lock_init (definitions->findex_init_lock); 640 definitions->canon_charset = canon_charset; 641} 642 643/* Create the fuzzy index. 644 Used only if use_fuzzy_matching is true and compendiums != NULL. */ 645static inline void 646definitions_init_findex (definitions_ty *definitions) 647{ 648 /* Protect against concurrent execution. */ 649 gl_lock_lock (definitions->findex_init_lock); 650 if (definitions->findex == NULL) 651 { 652 /* Combine all the compendium message lists into a single one. Don't 653 bother checking for duplicates. */ 654 message_list_ty *all_compendium; 655 size_t i; 656 657 all_compendium = message_list_alloc (false); 658 for (i = 0; i < compendiums->nitems; i++) 659 { 660 message_list_ty *mlp = compendiums->item[i]; 661 size_t j; 662 663 for (j = 0; j < mlp->nitems; j++) 664 message_list_append (all_compendium, mlp->item[j]); 665 } 666 667 /* Create the fuzzy index from it. */ 668 definitions->findex = 669 message_fuzzy_index_alloc (all_compendium, definitions->canon_charset); 670 } 671 gl_lock_unlock (definitions->findex_init_lock); 672} 673 674/* Return the current list of non-compendium messages. */ 675static inline message_list_ty * 676definitions_current_list (const definitions_ty *definitions) 677{ 678 return definitions->lists->item[0]; 679} 680 681/* Set the current list of non-compendium messages. */ 682static inline void 683definitions_set_current_list (definitions_ty *definitions, message_list_ty *mlp) 684{ 685 definitions->lists->item[0] = mlp; 686} 687 688/* Exact search. */ 689static inline message_ty * 690definitions_search (const definitions_ty *definitions, 691 const char *msgctxt, const char *msgid) 692{ 693 return message_list_list_search (definitions->lists, msgctxt, msgid); 694} 695 696/* Fuzzy search. 697 Used only if use_fuzzy_matching is true. */ 698static inline message_ty * 699definitions_search_fuzzy (definitions_ty *definitions, 700 const char *msgctxt, const char *msgid) 701{ 702 message_ty *mp1 = 703 message_list_search_fuzzy (definitions_current_list (definitions), 704 msgctxt, msgid); 705 if (compendiums != NULL) 706 { 707 message_ty *mp2; 708 709 /* Create the fuzzy index lazily. */ 710 if (definitions->findex == NULL) 711 definitions_init_findex (definitions); 712 713 mp2 = message_fuzzy_index_search (definitions->findex, msgctxt, msgid); 714 715 /* Choose the best among mp1, mp2. */ 716 if (mp1 == NULL 717 || (mp2 != NULL 718 && (fuzzy_search_goal_function (mp2, msgctxt, msgid) 719 > fuzzy_search_goal_function (mp1, msgctxt, msgid)))) 720 mp1 = mp2; 721 } 722 723 return mp1; 724} 725 726static inline void 727definitions_destroy (definitions_ty *definitions) 728{ 729 message_list_list_free (definitions->lists, 2); 730 if (definitions->findex != NULL) 731 message_fuzzy_index_free (definitions->findex); 732} 733 734 735/* A silent error logger. We are only interested in knowing whether errors 736 occurred at all. */ 737static void 738silent_error_logger (const char *format, ...) 739 __attribute__ ((__format__ (__printf__, 1, 2))); 740static void 741silent_error_logger (const char *format, ...) 742{ 743} 744 745 746/* Another silent error logger. */ 747static void 748silent_xerror (int severity, 749 const struct message_ty *message, 750 const char *filename, size_t lineno, size_t column, 751 int multiline_p, const char *message_text) 752{ 753} 754 755 756static message_ty * 757message_merge (message_ty *def, message_ty *ref, bool force_fuzzy, 758 const unsigned char *plural_distribution, 759 unsigned long plural_distribution_length) 760{ 761 const char *msgstr; 762 size_t msgstr_len; 763 const char *prev_msgctxt; 764 const char *prev_msgid; 765 const char *prev_msgid_plural; 766 message_ty *result; 767 size_t j, i; 768 769 /* Take the msgid from the reference. When fuzzy matches are made, 770 the definition will not be unique, but the reference will be - 771 usually because it has only been slightly changed. */ 772 773 /* Take the msgstr from the definition. The msgstr of the reference 774 is usually empty, as it was generated by xgettext. If we currently 775 process the header entry we have to merge the msgstr by using the 776 Report-Msgid-Bugs-To and POT-Creation-Date fields from the reference. */ 777 if (is_header (ref)) 778 { 779 /* Oh, oh. The header entry and we have something to fill in. */ 780 static const struct 781 { 782 const char *name; 783 size_t len; 784 } known_fields[] = 785 { 786 { "Project-Id-Version:", sizeof ("Project-Id-Version:") - 1 }, 787#define PROJECT_ID 0 788 { "Report-Msgid-Bugs-To:", sizeof ("Report-Msgid-Bugs-To:") - 1 }, 789#define REPORT_MSGID_BUGS_TO 1 790 { "POT-Creation-Date:", sizeof ("POT-Creation-Date:") - 1 }, 791#define POT_CREATION_DATE 2 792 { "PO-Revision-Date:", sizeof ("PO-Revision-Date:") - 1 }, 793#define PO_REVISION_DATE 3 794 { "Last-Translator:", sizeof ("Last-Translator:") - 1 }, 795#define LAST_TRANSLATOR 4 796 { "Language-Team:", sizeof ("Language-Team:") - 1 }, 797#define LANGUAGE_TEAM 5 798 { "MIME-Version:", sizeof ("MIME-Version:") - 1 }, 799#define MIME_VERSION 6 800 { "Content-Type:", sizeof ("Content-Type:") - 1 }, 801#define CONTENT_TYPE 7 802 { "Content-Transfer-Encoding:", 803 sizeof ("Content-Transfer-Encoding:") - 1 } 804#define CONTENT_TRANSFER 8 805 }; 806#define UNKNOWN 9 807 struct 808 { 809 const char *string; 810 size_t len; 811 } header_fields[UNKNOWN + 1]; 812 struct obstack pool; 813 const char *cp; 814 char *newp; 815 size_t len, cnt; 816 817 /* Clear all fields. */ 818 memset (header_fields, '\0', sizeof (header_fields)); 819 820 /* Prepare a temporary memory pool. */ 821 obstack_init (&pool); 822 823 cp = def->msgstr; 824 while (*cp != '\0') 825 { 826 const char *endp = strchr (cp, '\n'); 827 int terminated = endp != NULL; 828 829 if (!terminated) 830 { 831 /* Add a trailing newline. */ 832 char *copy; 833 endp = strchr (cp, '\0'); 834 835 len = endp - cp + 1; 836 837 copy = (char *) obstack_alloc (&pool, len + 1); 838 stpcpy (stpcpy (copy, cp), "\n"); 839 cp = copy; 840 } 841 else 842 { 843 len = (endp - cp) + 1; 844 ++endp; 845 } 846 847 /* Compare with any of the known fields. */ 848 for (cnt = 0; 849 cnt < sizeof (known_fields) / sizeof (known_fields[0]); 850 ++cnt) 851 if (c_strncasecmp (cp, known_fields[cnt].name, known_fields[cnt].len) 852 == 0) 853 break; 854 855 if (cnt < sizeof (known_fields) / sizeof (known_fields[0])) 856 { 857 header_fields[cnt].string = &cp[known_fields[cnt].len]; 858 header_fields[cnt].len = len - known_fields[cnt].len; 859 } 860 else 861 { 862 /* It's an unknown field. Append content to what is already 863 known. */ 864 char *extended = 865 (char *) obstack_alloc (&pool, 866 header_fields[UNKNOWN].len + len + 1); 867 memcpy (extended, header_fields[UNKNOWN].string, 868 header_fields[UNKNOWN].len); 869 memcpy (&extended[header_fields[UNKNOWN].len], cp, len); 870 extended[header_fields[UNKNOWN].len + len] = '\0'; 871 header_fields[UNKNOWN].string = extended; 872 header_fields[UNKNOWN].len += len; 873 } 874 875 cp = endp; 876 } 877 878 { 879 const char *msgid_bugs_ptr; 880 881 msgid_bugs_ptr = c_strstr (ref->msgstr, "Report-Msgid-Bugs-To:"); 882 if (msgid_bugs_ptr != NULL) 883 { 884 size_t msgid_bugs_len; 885 const char *endp; 886 887 msgid_bugs_ptr += sizeof ("Report-Msgid-Bugs-To:") - 1; 888 889 endp = strchr (msgid_bugs_ptr, '\n'); 890 if (endp == NULL) 891 { 892 /* Add a trailing newline. */ 893 char *extended; 894 endp = strchr (msgid_bugs_ptr, '\0'); 895 msgid_bugs_len = (endp - msgid_bugs_ptr) + 1; 896 extended = (char *) obstack_alloc (&pool, msgid_bugs_len + 1); 897 stpcpy (stpcpy (extended, msgid_bugs_ptr), "\n"); 898 msgid_bugs_ptr = extended; 899 } 900 else 901 msgid_bugs_len = (endp - msgid_bugs_ptr) + 1; 902 903 header_fields[REPORT_MSGID_BUGS_TO].string = msgid_bugs_ptr; 904 header_fields[REPORT_MSGID_BUGS_TO].len = msgid_bugs_len; 905 } 906 } 907 908 { 909 const char *pot_date_ptr; 910 911 pot_date_ptr = c_strstr (ref->msgstr, "POT-Creation-Date:"); 912 if (pot_date_ptr != NULL) 913 { 914 size_t pot_date_len; 915 const char *endp; 916 917 pot_date_ptr += sizeof ("POT-Creation-Date:") - 1; 918 919 endp = strchr (pot_date_ptr, '\n'); 920 if (endp == NULL) 921 { 922 /* Add a trailing newline. */ 923 char *extended; 924 endp = strchr (pot_date_ptr, '\0'); 925 pot_date_len = (endp - pot_date_ptr) + 1; 926 extended = (char *) obstack_alloc (&pool, pot_date_len + 1); 927 stpcpy (stpcpy (extended, pot_date_ptr), "\n"); 928 pot_date_ptr = extended; 929 } 930 else 931 pot_date_len = (endp - pot_date_ptr) + 1; 932 933 header_fields[POT_CREATION_DATE].string = pot_date_ptr; 934 header_fields[POT_CREATION_DATE].len = pot_date_len; 935 } 936 } 937 938 /* Concatenate all the various fields. */ 939 len = 0; 940 for (cnt = 0; cnt < UNKNOWN; ++cnt) 941 if (header_fields[cnt].string != NULL) 942 len += known_fields[cnt].len + header_fields[cnt].len; 943 len += header_fields[UNKNOWN].len; 944 945 cp = newp = XNMALLOC (len + 1, char); 946 newp[len] = '\0'; 947 948#define IF_FILLED(idx) \ 949 if (header_fields[idx].string) \ 950 newp = stpncpy (stpcpy (newp, known_fields[idx].name), \ 951 header_fields[idx].string, header_fields[idx].len) 952 953 IF_FILLED (PROJECT_ID); 954 IF_FILLED (REPORT_MSGID_BUGS_TO); 955 IF_FILLED (POT_CREATION_DATE); 956 IF_FILLED (PO_REVISION_DATE); 957 IF_FILLED (LAST_TRANSLATOR); 958 IF_FILLED (LANGUAGE_TEAM); 959 IF_FILLED (MIME_VERSION); 960 IF_FILLED (CONTENT_TYPE); 961 IF_FILLED (CONTENT_TRANSFER); 962 if (header_fields[UNKNOWN].string != NULL) 963 stpcpy (newp, header_fields[UNKNOWN].string); 964 965#undef IF_FILLED 966 967 /* Free the temporary memory pool. */ 968 obstack_free (&pool, NULL); 969 970 msgstr = cp; 971 msgstr_len = strlen (cp) + 1; 972 973 prev_msgctxt = NULL; 974 prev_msgid = NULL; 975 prev_msgid_plural = NULL; 976 } 977 else 978 { 979 msgstr = def->msgstr; 980 msgstr_len = def->msgstr_len; 981 982 if (def->is_fuzzy) 983 { 984 prev_msgctxt = def->prev_msgctxt; 985 prev_msgid = def->prev_msgid; 986 prev_msgid_plural = def->prev_msgid_plural; 987 } 988 else 989 { 990 prev_msgctxt = def->msgctxt; 991 prev_msgid = def->msgid; 992 prev_msgid_plural = def->msgid_plural; 993 } 994 } 995 996 result = message_alloc (ref->msgctxt != NULL ? xstrdup (ref->msgctxt) : NULL, 997 xstrdup (ref->msgid), ref->msgid_plural, 998 msgstr, msgstr_len, &def->pos); 999 1000 /* Take the comments from the definition file. There will be none at 1001 all in the reference file, as it was generated by xgettext. */ 1002 if (def->comment) 1003 for (j = 0; j < def->comment->nitems; ++j) 1004 message_comment_append (result, def->comment->item[j]); 1005 1006 /* Take the dot comments from the reference file, as they are 1007 generated by xgettext. Any in the definition file are old ones 1008 collected by previous runs of xgettext and msgmerge. */ 1009 if (ref->comment_dot) 1010 for (j = 0; j < ref->comment_dot->nitems; ++j) 1011 message_comment_dot_append (result, ref->comment_dot->item[j]); 1012 1013 /* The flags are mixed in a special way. Some informations come 1014 from the reference message (such as format/no-format), others 1015 come from the definition file (fuzzy or not). */ 1016 result->is_fuzzy = def->is_fuzzy | force_fuzzy; 1017 1018 /* If ref and def have the same msgid but different msgid_plural, it's 1019 a reason to mark the result fuzzy. */ 1020 if (!result->is_fuzzy 1021 && (ref->msgid_plural != NULL 1022 ? def->msgid_plural == NULL 1023 || strcmp (ref->msgid_plural, def->msgid_plural) != 0 1024 : def->msgid_plural != NULL)) 1025 result->is_fuzzy = true; 1026 1027 for (i = 0; i < NFORMATS; i++) 1028 { 1029 result->is_format[i] = ref->is_format[i]; 1030 1031 /* If the reference message is marked as being a format specifier, 1032 but the definition message is not, we check if the resulting 1033 message would pass "msgfmt -c". If yes, then all is fine. If 1034 not, we add a fuzzy marker, because 1035 1. the message needs the translator's attention, 1036 2. msgmerge must not transform a PO file which passes "msgfmt -c" 1037 into a PO file which doesn't. */ 1038 if (!result->is_fuzzy 1039 && possible_format_p (ref->is_format[i]) 1040 && !possible_format_p (def->is_format[i]) 1041 && check_msgid_msgstr_format_i (ref->msgid, ref->msgid_plural, 1042 msgstr, msgstr_len, i, 1043 plural_distribution, 1044 plural_distribution_length, 1045 silent_error_logger) > 0) 1046 result->is_fuzzy = true; 1047 } 1048 1049 result->do_wrap = ref->do_wrap; 1050 1051 /* Insert previous msgid, commented out with "#|". 1052 Do so only when --previous is specified, for backward compatibility. 1053 Since the "previous msgid" represents the original msgid that led to 1054 the current msgstr, 1055 - we can omit it if the resulting message is not fuzzy, 1056 - otherwise, if the corresponding message from the definition file 1057 was translated (not fuzzy), we use that message's msgid, 1058 - otherwise, we use that message's prev_msgid. */ 1059 if (keep_previous && result->is_fuzzy) 1060 { 1061 result->prev_msgctxt = prev_msgctxt; 1062 result->prev_msgid = prev_msgid; 1063 result->prev_msgid_plural = prev_msgid_plural; 1064 } 1065 1066 /* If the reference message was obsolete, make the resulting message 1067 obsolete. This case doesn't occur for POT files, but users sometimes 1068 use PO files that are themselves the result of msgmerge instead of POT 1069 files. */ 1070 result->obsolete = ref->obsolete; 1071 1072 /* Take the file position comments from the reference file, as they 1073 are generated by xgettext. Any in the definition file are old ones 1074 collected by previous runs of xgettext and msgmerge. */ 1075 for (j = 0; j < ref->filepos_count; ++j) 1076 { 1077 lex_pos_ty *pp = &ref->filepos[j]; 1078 message_comment_filepos (result, pp->file_name, pp->line_number); 1079 } 1080 1081 /* Special postprocessing is needed if the reference message is a 1082 plural form and the definition message isn't, or vice versa. */ 1083 if (ref->msgid_plural != NULL) 1084 { 1085 if (def->msgid_plural == NULL) 1086 result->used = 1; 1087 } 1088 else 1089 { 1090 if (def->msgid_plural != NULL) 1091 result->used = 2; 1092 } 1093 1094 /* All done, return the merged message to the caller. */ 1095 return result; 1096} 1097 1098 1099#define DOT_FREQUENCY 10 1100 1101static void 1102match_domain (const char *fn1, const char *fn2, 1103 definitions_ty *definitions, message_list_ty *refmlp, 1104 message_list_ty *resultmlp, 1105 struct statistics *stats, unsigned int *processed) 1106{ 1107 message_ty *header_entry; 1108 unsigned long int nplurals; 1109 const struct expression *plural_expr; 1110 char *untranslated_plural_msgstr; 1111 unsigned char *plural_distribution; 1112 unsigned long plural_distribution_length; 1113 struct search_result { message_ty *found; bool fuzzy; } *search_results; 1114 size_t j; 1115 1116 header_entry = 1117 message_list_search (definitions_current_list (definitions), NULL, ""); 1118 extract_plural_expression (header_entry ? header_entry->msgstr : NULL, 1119 &plural_expr, &nplurals); 1120 untranslated_plural_msgstr = XNMALLOC (nplurals, char); 1121 memset (untranslated_plural_msgstr, '\0', nplurals); 1122 1123 /* Determine the plural distribution of the plural_expr formula. */ 1124 { 1125 /* Disable error output temporarily. */ 1126 void (*old_po_xerror) (int, const struct message_ty *, const char *, size_t, 1127 size_t, int, const char *) 1128 = po_xerror; 1129 po_xerror = silent_xerror; 1130 1131 if (check_plural_eval (plural_expr, nplurals, header_entry, 1132 &plural_distribution, 1133 &plural_distribution_length) > 0) 1134 { 1135 plural_distribution = NULL; 1136 plural_distribution_length = 0; 1137 } 1138 1139 po_xerror = old_po_xerror; 1140 } 1141 1142 /* Most of the time is spent in definitions_search_fuzzy. 1143 Perform it in a separate loop that can be parallelized by an OpenMP 1144 capable compiler. */ 1145 search_results = XNMALLOC (refmlp->nitems, struct search_result); 1146 { 1147 long int nn = refmlp->nitems; 1148 long int jj; 1149 1150 /* Tell the OpenMP capable compiler to distribute this loop across 1151 several threads. The schedule is dynamic, because for some messages 1152 the loop body can be executed very quickly, whereas for others it takes 1153 a long time. */ 1154 #ifdef _OPENMP 1155 # pragma omp parallel for schedule(dynamic) 1156 #endif 1157 for (jj = 0; jj < nn; jj++) 1158 { 1159 message_ty *refmsg = refmlp->item[jj]; 1160 message_ty *defmsg; 1161 1162 /* Because merging can take a while we print something to signal 1163 we are not dead. */ 1164 if (!quiet && verbosity_level <= 1 && *processed % DOT_FREQUENCY == 0) 1165 fputc ('.', stderr); 1166 #ifdef _OPENMP 1167 # pragma omp atomic 1168 #endif 1169 (*processed)++; 1170 1171 /* See if it is in the other file. */ 1172 defmsg = 1173 definitions_search (definitions, refmsg->msgctxt, refmsg->msgid); 1174 if (defmsg != NULL) 1175 { 1176 search_results[jj].found = defmsg; 1177 search_results[jj].fuzzy = false; 1178 } 1179 else if (!is_header (refmsg) 1180 /* If the message was not defined at all, try to find a very 1181 similar message, it could be a typo, or the suggestion may 1182 help. */ 1183 && use_fuzzy_matching 1184 && ((defmsg = 1185 definitions_search_fuzzy (definitions, 1186 refmsg->msgctxt, 1187 refmsg->msgid)) != NULL)) 1188 { 1189 search_results[jj].found = defmsg; 1190 search_results[jj].fuzzy = true; 1191 } 1192 else 1193 search_results[jj].found = NULL; 1194 } 1195 } 1196 1197 for (j = 0; j < refmlp->nitems; j++) 1198 { 1199 message_ty *refmsg = refmlp->item[j]; 1200 1201 /* See if it is in the other file. 1202 This used definitions_search. */ 1203 if (search_results[j].found != NULL && !search_results[j].fuzzy) 1204 { 1205 message_ty *defmsg = search_results[j].found; 1206 /* Merge the reference with the definition: take the #. and 1207 #: comments from the reference, take the # comments from 1208 the definition, take the msgstr from the definition. Add 1209 this merged entry to the output message list. */ 1210 message_ty *mp = 1211 message_merge (defmsg, refmsg, false, 1212 plural_distribution, plural_distribution_length); 1213 1214 message_list_append (resultmlp, mp); 1215 1216 /* Remember that this message has been used, when we scan 1217 later to see if anything was omitted. */ 1218 defmsg->used = 1; 1219 stats->merged++; 1220 } 1221 else if (!is_header (refmsg)) 1222 { 1223 /* If the message was not defined at all, try to find a very 1224 similar message, it could be a typo, or the suggestion may 1225 help. This search assumed use_fuzzy_matching and used 1226 definitions_search_fuzzy. */ 1227 if (search_results[j].found != NULL && search_results[j].fuzzy) 1228 { 1229 message_ty *defmsg = search_results[j].found; 1230 message_ty *mp; 1231 1232 if (verbosity_level > 1) 1233 { 1234 po_gram_error_at_line (&refmsg->pos, _("\ 1235this message is used but not defined...")); 1236 error_message_count--; 1237 po_gram_error_at_line (&defmsg->pos, _("\ 1238...but this definition is similar")); 1239 } 1240 1241 /* Merge the reference with the definition: take the #. and 1242 #: comments from the reference, take the # comments from 1243 the definition, take the msgstr from the definition. Add 1244 this merged entry to the output message list. */ 1245 mp = message_merge (defmsg, refmsg, true, 1246 plural_distribution, 1247 plural_distribution_length); 1248 1249 message_list_append (resultmlp, mp); 1250 1251 /* Remember that this message has been used, when we scan 1252 later to see if anything was omitted. */ 1253 defmsg->used = 1; 1254 stats->fuzzied++; 1255 if (!quiet && verbosity_level <= 1) 1256 /* Always print a dot if we handled a fuzzy match. */ 1257 fputc ('.', stderr); 1258 } 1259 else 1260 { 1261 message_ty *mp; 1262 bool is_untranslated; 1263 const char *p; 1264 const char *pend; 1265 1266 if (verbosity_level > 1) 1267 po_gram_error_at_line (&refmsg->pos, _("\ 1268this message is used but not defined in %s"), fn1); 1269 1270 mp = message_copy (refmsg); 1271 1272 if (mp->msgid_plural != NULL) 1273 { 1274 /* Test if mp is untranslated. (It most likely is.) */ 1275 is_untranslated = true; 1276 for (p = mp->msgstr, pend = p + mp->msgstr_len; p < pend; p++) 1277 if (*p != '\0') 1278 { 1279 is_untranslated = false; 1280 break; 1281 } 1282 if (is_untranslated) 1283 { 1284 /* Change mp->msgstr_len consecutive empty strings into 1285 nplurals consecutive empty strings. */ 1286 if (nplurals > mp->msgstr_len) 1287 mp->msgstr = untranslated_plural_msgstr; 1288 mp->msgstr_len = nplurals; 1289 } 1290 } 1291 1292 message_list_append (resultmlp, mp); 1293 stats->missing++; 1294 } 1295 } 1296 } 1297 1298 free (search_results); 1299 1300 /* Now postprocess the problematic merges. This is needed because we 1301 want the result to pass the "msgfmt -c -v" check. */ 1302 { 1303 /* message_merge sets mp->used to 1 or 2, depending on the problem. 1304 Compute the bitwise OR of all these. */ 1305 int problematic = 0; 1306 1307 for (j = 0; j < resultmlp->nitems; j++) 1308 problematic |= resultmlp->item[j]->used; 1309 1310 if (problematic) 1311 { 1312 unsigned long int nplurals = 0; 1313 1314 if (problematic & 1) 1315 { 1316 /* Need to know nplurals of the result domain. */ 1317 message_ty *header_entry = 1318 message_list_search (resultmlp, NULL, ""); 1319 1320 nplurals = get_plural_count (header_entry 1321 ? header_entry->msgstr 1322 : NULL); 1323 } 1324 1325 for (j = 0; j < resultmlp->nitems; j++) 1326 { 1327 message_ty *mp = resultmlp->item[j]; 1328 1329 if ((mp->used & 1) && (nplurals > 0)) 1330 { 1331 /* ref->msgid_plural != NULL but def->msgid_plural == NULL. 1332 Use a copy of def->msgstr for each possible plural form. */ 1333 size_t new_msgstr_len; 1334 char *new_msgstr; 1335 char *p; 1336 unsigned long i; 1337 1338 if (verbosity_level > 1) 1339 { 1340 po_gram_error_at_line (&mp->pos, _("\ 1341this message should define plural forms")); 1342 } 1343 1344 new_msgstr_len = nplurals * mp->msgstr_len; 1345 new_msgstr = XNMALLOC (new_msgstr_len, char); 1346 for (i = 0, p = new_msgstr; i < nplurals; i++) 1347 { 1348 memcpy (p, mp->msgstr, mp->msgstr_len); 1349 p += mp->msgstr_len; 1350 } 1351 mp->msgstr = new_msgstr; 1352 mp->msgstr_len = new_msgstr_len; 1353 mp->is_fuzzy = true; 1354 } 1355 1356 if ((mp->used & 2) && (mp->msgstr_len > strlen (mp->msgstr) + 1)) 1357 { 1358 /* ref->msgid_plural == NULL but def->msgid_plural != NULL. 1359 Use only the first among the plural forms. */ 1360 1361 if (verbosity_level > 1) 1362 { 1363 po_gram_error_at_line (&mp->pos, _("\ 1364this message should not define plural forms")); 1365 } 1366 1367 mp->msgstr_len = strlen (mp->msgstr) + 1; 1368 mp->is_fuzzy = true; 1369 } 1370 1371 /* Postprocessing of this message is done. */ 1372 mp->used = 0; 1373 } 1374 } 1375 } 1376} 1377 1378static msgdomain_list_ty * 1379merge (const char *fn1, const char *fn2, catalog_input_format_ty input_syntax, 1380 msgdomain_list_ty **defp) 1381{ 1382 msgdomain_list_ty *def; 1383 msgdomain_list_ty *ref; 1384 size_t j, k; 1385 unsigned int processed; 1386 struct statistics stats; 1387 msgdomain_list_ty *result; 1388 definitions_ty definitions; 1389 message_list_ty *empty_list; 1390 1391 stats.merged = stats.fuzzied = stats.missing = stats.obsolete = 0; 1392 1393 /* This is the definitions file, created by a human. */ 1394 def = read_catalog_file (fn1, input_syntax); 1395 1396 /* This is the references file, created by groping the sources with 1397 the xgettext program. */ 1398 ref = read_catalog_file (fn2, input_syntax); 1399 /* Add a dummy header entry, if the references file contains none. */ 1400 for (k = 0; k < ref->nitems; k++) 1401 if (message_list_search (ref->item[k]->messages, NULL, "") == NULL) 1402 { 1403 static lex_pos_ty pos = { __FILE__, __LINE__ }; 1404 message_ty *refheader = message_alloc (NULL, "", NULL, "", 1, &pos); 1405 1406 message_list_prepend (ref->item[k]->messages, refheader); 1407 } 1408 1409 /* The references file can be either in ASCII or in UTF-8. If it is 1410 in UTF-8, we have to convert the definitions and the compendiums to 1411 UTF-8 as well. */ 1412 { 1413 bool was_utf8 = false; 1414 for (k = 0; k < ref->nitems; k++) 1415 { 1416 message_list_ty *mlp = ref->item[k]->messages; 1417 1418 for (j = 0; j < mlp->nitems; j++) 1419 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete) 1420 { 1421 const char *header = mlp->item[j]->msgstr; 1422 1423 if (header != NULL) 1424 { 1425 const char *charsetstr = c_strstr (header, "charset="); 1426 1427 if (charsetstr != NULL) 1428 { 1429 size_t len; 1430 1431 charsetstr += strlen ("charset="); 1432 len = strcspn (charsetstr, " \t\n"); 1433 if (len == strlen ("UTF-8") 1434 && c_strncasecmp (charsetstr, "UTF-8", len) == 0) 1435 was_utf8 = true; 1436 } 1437 } 1438 } 1439 } 1440 if (was_utf8) 1441 { 1442 def = iconv_msgdomain_list (def, "UTF-8", true, fn1); 1443 if (compendiums != NULL) 1444 for (k = 0; k < compendiums->nitems; k++) 1445 iconv_message_list (compendiums->item[k], NULL, po_charset_utf8, 1446 compendium_filenames->item[k]); 1447 } 1448 else if (compendiums != NULL && compendiums->nitems > 0) 1449 { 1450 /* Ensure that the definitions and the compendiums are in the same 1451 encoding. Prefer the encoding of the definitions file, if 1452 possible; otherwise, if the definitions file is empty and the 1453 compendiums are all in the same encoding, use that encoding; 1454 otherwise, use UTF-8. */ 1455 bool conversion_done = false; 1456 { 1457 char *charset = NULL; 1458 1459 /* Get the encoding of the definitions file. */ 1460 for (k = 0; k < def->nitems; k++) 1461 { 1462 message_list_ty *mlp = def->item[k]->messages; 1463 1464 for (j = 0; j < mlp->nitems; j++) 1465 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete) 1466 { 1467 const char *header = mlp->item[j]->msgstr; 1468 1469 if (header != NULL) 1470 { 1471 const char *charsetstr = c_strstr (header, "charset="); 1472 1473 if (charsetstr != NULL) 1474 { 1475 size_t len; 1476 1477 charsetstr += strlen ("charset="); 1478 len = strcspn (charsetstr, " \t\n"); 1479 charset = (char *) xmalloca (len + 1); 1480 memcpy (charset, charsetstr, len); 1481 charset[len] = '\0'; 1482 break; 1483 } 1484 } 1485 } 1486 if (charset != NULL) 1487 break; 1488 } 1489 if (charset != NULL) 1490 { 1491 const char *canon_charset = po_charset_canonicalize (charset); 1492 1493 if (canon_charset != NULL) 1494 { 1495 bool all_compendiums_iconvable = true; 1496 1497 if (compendiums != NULL) 1498 for (k = 0; k < compendiums->nitems; k++) 1499 if (!is_message_list_iconvable (compendiums->item[k], 1500 NULL, canon_charset)) 1501 { 1502 all_compendiums_iconvable = false; 1503 break; 1504 } 1505 1506 if (all_compendiums_iconvable) 1507 { 1508 /* Convert the compendiums to def's encoding. */ 1509 if (compendiums != NULL) 1510 for (k = 0; k < compendiums->nitems; k++) 1511 iconv_message_list (compendiums->item[k], 1512 NULL, canon_charset, 1513 compendium_filenames->item[k]); 1514 conversion_done = true; 1515 } 1516 } 1517 freea (charset); 1518 } 1519 } 1520 if (!conversion_done) 1521 { 1522 if (def->nitems == 0 1523 || (def->nitems == 1 && def->item[0]->messages->nitems == 0)) 1524 { 1525 /* The definitions file is empty. 1526 Compare the encodings of the compendiums. */ 1527 const char *common_canon_charset = NULL; 1528 1529 for (k = 0; k < compendiums->nitems; k++) 1530 { 1531 message_list_ty *mlp = compendiums->item[k]; 1532 char *charset = NULL; 1533 const char *canon_charset = NULL; 1534 1535 for (j = 0; j < mlp->nitems; j++) 1536 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete) 1537 { 1538 const char *header = mlp->item[j]->msgstr; 1539 1540 if (header != NULL) 1541 { 1542 const char *charsetstr = 1543 c_strstr (header, "charset="); 1544 1545 if (charsetstr != NULL) 1546 { 1547 size_t len; 1548 1549 charsetstr += strlen ("charset="); 1550 len = strcspn (charsetstr, " \t\n"); 1551 charset = (char *) xmalloca (len + 1); 1552 memcpy (charset, charsetstr, len); 1553 charset[len] = '\0'; 1554 1555 break; 1556 } 1557 } 1558 } 1559 if (charset != NULL) 1560 { 1561 canon_charset = po_charset_canonicalize (charset); 1562 freea (charset); 1563 } 1564 /* If no charset declaration was found in this file, 1565 or if it is not a valid encoding name, or if it 1566 differs from the common charset found so far, 1567 we have no common charset. */ 1568 if (canon_charset == NULL 1569 || (common_canon_charset != NULL 1570 && canon_charset != common_canon_charset)) 1571 { 1572 common_canon_charset = NULL; 1573 break; 1574 } 1575 common_canon_charset = canon_charset; 1576 } 1577 1578 if (common_canon_charset != NULL) 1579 /* No conversion needed in this case. */ 1580 conversion_done = true; 1581 } 1582 if (!conversion_done) 1583 { 1584 /* It's too hairy to find out what would be the optimal target 1585 encoding. So, convert everything to UTF-8. */ 1586 def = iconv_msgdomain_list (def, "UTF-8", true, fn1); 1587 if (compendiums != NULL) 1588 for (k = 0; k < compendiums->nitems; k++) 1589 iconv_message_list (compendiums->item[k], 1590 NULL, po_charset_utf8, 1591 compendium_filenames->item[k]); 1592 } 1593 } 1594 } 1595 } 1596 1597 /* Initialize and preprocess the total set of message definitions. */ 1598 definitions_init (&definitions, po_charset_utf8); 1599 empty_list = message_list_alloc (false); 1600 1601 result = msgdomain_list_alloc (false); 1602 processed = 0; 1603 1604 /* Every reference must be matched with its definition. */ 1605 if (!multi_domain_mode) 1606 for (k = 0; k < ref->nitems; k++) 1607 { 1608 const char *domain = ref->item[k]->domain; 1609 message_list_ty *refmlp = ref->item[k]->messages; 1610 message_list_ty *resultmlp = 1611 msgdomain_list_sublist (result, domain, true); 1612 message_list_ty *defmlp; 1613 1614 defmlp = msgdomain_list_sublist (def, domain, false); 1615 if (defmlp == NULL) 1616 defmlp = empty_list; 1617 definitions_set_current_list (&definitions, defmlp); 1618 1619 match_domain (fn1, fn2, &definitions, refmlp, resultmlp, 1620 &stats, &processed); 1621 } 1622 else 1623 { 1624 /* Apply the references messages in the default domain to each of 1625 the definition domains. */ 1626 message_list_ty *refmlp = ref->item[0]->messages; 1627 1628 for (k = 0; k < def->nitems; k++) 1629 { 1630 const char *domain = def->item[k]->domain; 1631 message_list_ty *defmlp = def->item[k]->messages; 1632 1633 /* Ignore the default message domain if it has no messages. */ 1634 if (k > 0 || defmlp->nitems > 0) 1635 { 1636 message_list_ty *resultmlp = 1637 msgdomain_list_sublist (result, domain, true); 1638 1639 definitions_set_current_list (&definitions, defmlp); 1640 1641 match_domain (fn1, fn2, &definitions, refmlp, resultmlp, 1642 &stats, &processed); 1643 } 1644 } 1645 } 1646 1647 definitions_destroy (&definitions); 1648 1649 /* Look for messages in the definition file, which are not present 1650 in the reference file, indicating messages which defined but not 1651 used in the program. Don't scan the compendium(s). */ 1652 for (k = 0; k < def->nitems; ++k) 1653 { 1654 const char *domain = def->item[k]->domain; 1655 message_list_ty *defmlp = def->item[k]->messages; 1656 1657 for (j = 0; j < defmlp->nitems; j++) 1658 { 1659 message_ty *defmsg = defmlp->item[j]; 1660 1661 if (!defmsg->used) 1662 { 1663 /* Remember the old translation although it is not used anymore. 1664 But we mark it as obsolete. */ 1665 message_ty *mp; 1666 1667 mp = message_copy (defmsg); 1668 /* Clear the extracted comments. */ 1669 if (mp->comment_dot != NULL) 1670 { 1671 string_list_free (mp->comment_dot); 1672 mp->comment_dot = NULL; 1673 } 1674 /* Clear the file position comments. */ 1675 if (mp->filepos != NULL) 1676 { 1677 size_t i; 1678 1679 for (i = 0; i < mp->filepos_count; i++) 1680 free ((char *) mp->filepos[i].file_name); 1681 mp->filepos_count = 0; 1682 free (mp->filepos); 1683 mp->filepos = NULL; 1684 } 1685 /* Mark as obsolete. */ 1686 mp->obsolete = true; 1687 1688 message_list_append (msgdomain_list_sublist (result, domain, true), 1689 mp); 1690 stats.obsolete++; 1691 } 1692 } 1693 } 1694 1695 /* Determine the known a-priori encoding, if any. */ 1696 if (def->encoding == ref->encoding) 1697 result->encoding = def->encoding; 1698 1699 /* Report some statistics. */ 1700 if (verbosity_level > 0) 1701 fprintf (stderr, _("%s\ 1702Read %ld old + %ld reference, \ 1703merged %ld, fuzzied %ld, missing %ld, obsolete %ld.\n"), 1704 !quiet && verbosity_level <= 1 ? "\n" : "", 1705 (long) def->nitems, (long) ref->nitems, 1706 (long) stats.merged, (long) stats.fuzzied, (long) stats.missing, 1707 (long) stats.obsolete); 1708 else if (!quiet) 1709 fputs (_(" done.\n"), stderr); 1710 1711 /* Return results. */ 1712 *defp = def; 1713 return result; 1714} 1715