1/* Copyright (C) 2000-2009, 2011 Free Software Foundation, Inc. 2 This file is part of the GNU LIBICONV Library. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17#include "config.h" 18#ifndef ICONV_CONST 19# define ICONV_CONST 20#endif 21 22#include <limits.h> 23#include <stddef.h> 24#include <stdio.h> 25#include <stdlib.h> 26#include <string.h> 27#include <iconv.h> 28#include <errno.h> 29#include <locale.h> 30#include <fcntl.h> 31 32/* Ensure that iconv_no_i18n does not depend on libintl. */ 33#ifdef NO_I18N 34# undef ENABLE_NLS 35# undef ENABLE_RELOCATABLE 36#endif 37 38#include "binary-io.h" 39#include "progname.h" 40#include "relocatable.h" 41#include "safe-read.h" 42#include "xalloc.h" 43#include "uniwidth.h" 44#include "uniwidth/cjk.h" 45 46/* Ensure that iconv_no_i18n does not depend on libintl. */ 47#ifdef NO_I18N 48#include <stdarg.h> 49static void 50error (int status, int errnum, const char *message, ...) 51{ 52 va_list args; 53 54 fflush(stdout); 55 fprintf(stderr,"%s: ",program_name); 56 va_start(args,message); 57 vfprintf(stderr,message,args); 58 va_end(args); 59 if (errnum) { 60 const char *s = strerror(errnum); 61 if (s == NULL) 62 s = "Unknown system error"; 63 } 64 putc('\n',stderr); 65 fflush(stderr); 66 if (status) 67 exit(status); 68} 69#else 70# include "error.h" 71#endif 72 73#include "gettext.h" 74 75#define _(str) gettext(str) 76 77/* Ensure that iconv_no_i18n does not depend on libintl. */ 78#ifdef NO_I18N 79# define xmalloc malloc 80# define xalloc_die abort 81#endif 82 83/* Locale independent test for a decimal digit. 84 Argument can be 'char' or 'unsigned char'. (Whereas the argument of 85 <ctype.h> isdigit must be an 'unsigned char'.) */ 86#undef isdigit 87#define isdigit(c) ((unsigned int) ((c) - '0') < 10) 88 89/* Locale independent test for a printable character. 90 Argument can be 'char' or 'unsigned char'. (Whereas the argument of 91 <ctype.h> isdigit must be an 'unsigned char'.) */ 92#define c_isprint(c) ((c) >= ' ' && (c) <= '~') 93 94/* ========================================================================= */ 95 96static int discard_unconvertible = 0; 97static int silent = 0; 98 99static void usage (int exitcode) 100{ 101 if (exitcode != 0) { 102 const char* helpstring1 = 103 /* TRANSLATORS: The first line of the short usage message. */ 104 _("Usage: iconv [-c] [-s] [-f fromcode] [-t tocode] [file ...]"); 105 const char* helpstring2 = 106 /* TRANSLATORS: The second line of the short usage message. 107 Align it correctly against the first line. */ 108 _("or: iconv -l"); 109 fprintf(stderr, "%s\n%s\n", helpstring1, helpstring2); 110 fprintf(stderr, _("Try `%s --help' for more information.\n"), program_name); 111 } else { 112 /* xgettext: no-wrap */ 113 /* TRANSLATORS: The first line of the long usage message. 114 The %s placeholder expands to the program name. */ 115 printf(_("\ 116Usage: %s [OPTION...] [-f ENCODING] [-t ENCODING] [INPUTFILE...]\n"), 117 program_name); 118 /* xgettext: no-wrap */ 119 /* TRANSLATORS: The second line of the long usage message. 120 Align it correctly against the first line. 121 The %s placeholder expands to the program name. */ 122 printf(_("\ 123or: %s -l\n"), 124 program_name); 125 printf("\n"); 126 /* xgettext: no-wrap */ 127 /* TRANSLATORS: Description of the iconv program. */ 128 printf(_("\ 129Converts text from one encoding to another encoding.\n")); 130 printf("\n"); 131 /* xgettext: no-wrap */ 132 printf(_("\ 133Options controlling the input and output format:\n")); 134 /* xgettext: no-wrap */ 135 printf(_("\ 136 -f ENCODING, --from-code=ENCODING\n\ 137 the encoding of the input\n")); 138 /* xgettext: no-wrap */ 139 printf(_("\ 140 -t ENCODING, --to-code=ENCODING\n\ 141 the encoding of the output\n")); 142 printf("\n"); 143 /* xgettext: no-wrap */ 144 printf(_("\ 145Options controlling conversion problems:\n")); 146 /* xgettext: no-wrap */ 147 printf(_("\ 148 -c discard unconvertible characters\n")); 149 /* xgettext: no-wrap */ 150 printf(_("\ 151 --unicode-subst=FORMATSTRING\n\ 152 substitution for unconvertible Unicode characters\n")); 153 /* xgettext: no-wrap */ 154 printf(_("\ 155 --byte-subst=FORMATSTRING substitution for unconvertible bytes\n")); 156 /* xgettext: no-wrap */ 157 printf(_("\ 158 --widechar-subst=FORMATSTRING\n\ 159 substitution for unconvertible wide characters\n")); 160 printf("\n"); 161 /* xgettext: no-wrap */ 162 printf(_("\ 163Options controlling error output:\n")); 164 /* xgettext: no-wrap */ 165 printf(_("\ 166 -s, --silent suppress error messages about conversion problems\n")); 167 printf("\n"); 168 /* xgettext: no-wrap */ 169 printf(_("\ 170Informative output:\n")); 171 /* xgettext: no-wrap */ 172 printf(_("\ 173 -l, --list list the supported encodings\n")); 174 /* xgettext: no-wrap */ 175 printf(_("\ 176 --help display this help and exit\n")); 177 /* xgettext: no-wrap */ 178 printf(_("\ 179 --version output version information and exit\n")); 180 printf("\n"); 181 /* TRANSLATORS: The placeholder indicates the bug-reporting address 182 for this package. Please add _another line_ saying 183 "Report translation bugs to <...>\n" with the address for translation 184 bugs (typically your translation team's web or email address). */ 185 fputs(_("Report bugs to <bug-gnu-libiconv@gnu.org>.\n"),stdout); 186 } 187 exit(exitcode); 188} 189 190static void print_version (void) 191{ 192 printf("iconv (GNU libiconv %d.%d)\n", 193 _libiconv_version >> 8, _libiconv_version & 0xff); 194 printf("Copyright (C) %s Free Software Foundation, Inc.\n", "2000-2011"); 195 /* xgettext: no-wrap */ 196 fputs (_("\ 197License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\ 198This is free software: you are free to change and redistribute it.\n\ 199There is NO WARRANTY, to the extent permitted by law.\n\ 200"),stdout); 201 /* TRANSLATORS: The %s placeholder expands to an author's name. */ 202 printf(_("Written by %s.\n"),"Bruno Haible"); 203 exit(EXIT_SUCCESS); 204} 205 206static int print_one (unsigned int namescount, const char * const * names, 207 void* data) 208{ 209 unsigned int i; 210 (void)data; 211 for (i = 0; i < namescount; i++) { 212 if (i > 0) 213 putc(' ',stdout); 214 fputs(names[i],stdout); 215 } 216 putc('\n',stdout); 217 return 0; 218} 219 220/* ========================================================================= */ 221 222/* Line number and column position. */ 223static unsigned int line; 224static unsigned int column; 225static const char* cjkcode; 226/* Update the line number and column position after a character was 227 successfully converted. */ 228static void update_line_column (unsigned int uc, void* data) 229{ 230 if (uc == 0x000A) { 231 line++; 232 column = 0; 233 } else { 234 int width = uc_width(uc, cjkcode); 235 if (width >= 0) 236 column += width; 237 else if (uc == 0x0009) 238 column += 8 - (column % 8); 239 } 240} 241 242/* ========================================================================= */ 243 244/* Production of placeholder strings as fallback for unconvertible 245 characters. */ 246 247/* Check that the argument is a format string taking either no argument 248 or exactly one unsigned integer argument. Returns the maximum output 249 size of the format string. */ 250static size_t check_subst_formatstring (const char *format, const char *param_name) 251{ 252 /* C format strings are described in POSIX (IEEE P1003.1 2001), section 253 XSH 3 fprintf(). See also Linux fprintf(3) manual page. 254 For simplicity, we don't accept 255 - the '%m$' reordering syntax, 256 - the 'I' flag, 257 - width specifications referring to an argument, 258 - precision specifications referring to an argument, 259 - size specifiers, 260 - format specifiers other than 'o', 'u', 'x', 'X'. 261 What remains? 262 A directive 263 - starts with '%', 264 - is optionally followed by any of the characters '#', '0', '-', ' ', 265 '+', "'", each of which acts as a flag, 266 - is optionally followed by a width specification: a nonempty digit 267 sequence, 268 - is optionally followed by '.' and a precision specification: a 269 nonempty digit sequence, 270 - is finished by a specifier 271 - '%', that needs no argument, 272 - 'o', 'u', 'x', 'X', that need an unsigned integer argument. 273 */ 274 size_t maxsize = 0; 275 unsigned int unnumbered_arg_count = 0; 276 277 for (; *format != '\0';) { 278 if (*format++ == '%') { 279 /* A directive. */ 280 unsigned int width = 0; 281 unsigned int precision = 0; 282 unsigned int length; 283 /* Parse flags. */ 284 for (;;) { 285 if (*format == ' ' || *format == '+' || *format == '-' 286 || *format == '#' || *format == '0' || *format == '\'') 287 format++; 288 else 289 break; 290 } 291 /* Parse width. */ 292 if (*format == '*') 293 error(EXIT_FAILURE,0, 294 /* TRANSLATORS: An error message. 295 The %s placeholder expands to a command-line option. */ 296 _("%s argument: A format directive with a variable width is not allowed here."), 297 param_name); 298 if (isdigit (*format)) { 299 do { 300 width = 10*width + (*format - '0'); 301 format++; 302 } while (isdigit (*format)); 303 } 304 /* Parse precision. */ 305 if (*format == '.') { 306 format++; 307 if (*format == '*') 308 error(EXIT_FAILURE,0, 309 /* TRANSLATORS: An error message. 310 The %s placeholder expands to a command-line option. */ 311 _("%s argument: A format directive with a variable precision is not allowed here."), 312 param_name); 313 if (isdigit (*format)) { 314 do { 315 precision = 10*precision + (*format - '0'); 316 format++; 317 } while (isdigit (*format)); 318 } 319 } 320 /* Parse size. */ 321 switch (*format) { 322 case 'h': case 'l': case 'L': case 'q': 323 case 'j': case 'z': case 'Z': case 't': 324 error(EXIT_FAILURE,0, 325 /* TRANSLATORS: An error message. 326 The %s placeholder expands to a command-line option. */ 327 _("%s argument: A format directive with a size is not allowed here."), 328 param_name); 329 } 330 /* Parse end of directive. */ 331 switch (*format) { 332 case '%': 333 length = 1; 334 break; 335 case 'u': case 'o': case 'x': case 'X': 336 if (*format == 'u') { 337 length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT 338 * 0.30103 /* binary -> decimal */ 339 ) 340 + 1; /* turn floor into ceil */ 341 if (length < precision) 342 length = precision; 343 length *= 2; /* estimate for FLAG_GROUP */ 344 length += 1; /* account for leading sign */ 345 } else if (*format == 'o') { 346 length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT 347 * 0.333334 /* binary -> octal */ 348 ) 349 + 1; /* turn floor into ceil */ 350 if (length < precision) 351 length = precision; 352 length += 1; /* account for leading sign */ 353 } else { /* 'x', 'X' */ 354 length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT 355 * 0.25 /* binary -> hexadecimal */ 356 ) 357 + 1; /* turn floor into ceil */ 358 if (length < precision) 359 length = precision; 360 length += 2; /* account for leading sign or alternate form */ 361 } 362 unnumbered_arg_count++; 363 break; 364 default: 365 if (*format == '\0') 366 error(EXIT_FAILURE,0, 367 /* TRANSLATORS: An error message. 368 The %s placeholder expands to a command-line option. */ 369 _("%s argument: The string ends in the middle of a directive."), 370 param_name); 371 else if (c_isprint(*format)) 372 error(EXIT_FAILURE,0, 373 /* TRANSLATORS: An error message. 374 The %s placeholder expands to a command-line option. 375 The %c placeholder expands to an unknown format directive. */ 376 _("%s argument: The character '%c' is not a valid conversion specifier."), 377 param_name,*format); 378 else 379 error(EXIT_FAILURE,0, 380 /* TRANSLATORS: An error message. 381 The %s placeholder expands to a command-line option. */ 382 _("%s argument: The character that terminates the format directive is not a valid conversion specifier."), 383 param_name); 384 abort(); /*NOTREACHED*/ 385 } 386 format++; 387 if (length < width) 388 length = width; 389 maxsize += length; 390 } else 391 maxsize++; 392 } 393 if (unnumbered_arg_count > 1) 394 error(EXIT_FAILURE,0, 395 /* TRANSLATORS: An error message. 396 The %s placeholder expands to a command-line option. 397 The %u placeholder expands to the number of arguments consumed by the format string. */ 398 ngettext("%s argument: The format string consumes more than one argument: %u argument.", 399 "%s argument: The format string consumes more than one argument: %u arguments.", 400 unnumbered_arg_count), 401 param_name,unnumbered_arg_count); 402 return maxsize; 403} 404 405/* Format strings. */ 406static const char* ilseq_byte_subst; 407static const char* ilseq_wchar_subst; 408static const char* ilseq_unicode_subst; 409 410/* Maximum result size for each format string. */ 411static size_t ilseq_byte_subst_size; 412static size_t ilseq_wchar_subst_size; 413static size_t ilseq_unicode_subst_size; 414 415/* Buffer of size ilseq_byte_subst_size+1. */ 416static char* ilseq_byte_subst_buffer; 417#if HAVE_WCHAR_T 418/* Buffer of size ilseq_wchar_subst_size+1. */ 419static char* ilseq_wchar_subst_buffer; 420#endif 421/* Buffer of size ilseq_unicode_subst_size+1. */ 422static char* ilseq_unicode_subst_buffer; 423 424/* Auxiliary variables for subst_mb_to_uc_fallback. */ 425/* Converter from locale encoding to UCS-4. */ 426static iconv_t subst_mb_to_uc_cd; 427/* Buffer of size ilseq_byte_subst_size. */ 428static unsigned int* subst_mb_to_uc_temp_buffer; 429 430static void subst_mb_to_uc_fallback 431 (const char* inbuf, size_t inbufsize, 432 void (*write_replacement) (const unsigned int *buf, size_t buflen, 433 void* callback_arg), 434 void* callback_arg, 435 void* data) 436{ 437 for (; inbufsize > 0; inbuf++, inbufsize--) { 438 const char* inptr; 439 size_t inbytesleft; 440 char* outptr; 441 size_t outbytesleft; 442 sprintf(ilseq_byte_subst_buffer, 443 ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf); 444 inptr = ilseq_byte_subst_buffer; 445 inbytesleft = strlen(ilseq_byte_subst_buffer); 446 outptr = (char*)subst_mb_to_uc_temp_buffer; 447 outbytesleft = ilseq_byte_subst_size*sizeof(unsigned int); 448 iconv(subst_mb_to_uc_cd,NULL,NULL,NULL,NULL); 449 if (iconv(subst_mb_to_uc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft) 450 == (size_t)(-1) 451 || iconv(subst_mb_to_uc_cd, NULL,NULL, &outptr,&outbytesleft) 452 == (size_t)(-1)) 453 error(EXIT_FAILURE,0, 454 /* TRANSLATORS: An error message. 455 The %s placeholder expands to a piece of text, specified through --byte-subst. */ 456 _("cannot convert byte substitution to Unicode: %s"), 457 ilseq_byte_subst_buffer); 458 if (!(outbytesleft%sizeof(unsigned int) == 0)) 459 abort(); 460 write_replacement(subst_mb_to_uc_temp_buffer, 461 ilseq_byte_subst_size-(outbytesleft/sizeof(unsigned int)), 462 callback_arg); 463 } 464} 465 466/* Auxiliary variables for subst_uc_to_mb_fallback. */ 467/* Converter from locale encoding to target encoding. */ 468static iconv_t subst_uc_to_mb_cd; 469/* Buffer of size ilseq_unicode_subst_size*4. */ 470static char* subst_uc_to_mb_temp_buffer; 471 472static void subst_uc_to_mb_fallback 473 (unsigned int code, 474 void (*write_replacement) (const char *buf, size_t buflen, 475 void* callback_arg), 476 void* callback_arg, 477 void* data) 478{ 479 const char* inptr; 480 size_t inbytesleft; 481 char* outptr; 482 size_t outbytesleft; 483 sprintf(ilseq_unicode_subst_buffer, ilseq_unicode_subst, code); 484 inptr = ilseq_unicode_subst_buffer; 485 inbytesleft = strlen(ilseq_unicode_subst_buffer); 486 outptr = subst_uc_to_mb_temp_buffer; 487 outbytesleft = ilseq_unicode_subst_size*4; 488 iconv(subst_uc_to_mb_cd,NULL,NULL,NULL,NULL); 489 if (iconv(subst_uc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft) 490 == (size_t)(-1) 491 || iconv(subst_uc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft) 492 == (size_t)(-1)) 493 error(EXIT_FAILURE,0, 494 /* TRANSLATORS: An error message. 495 The %s placeholder expands to a piece of text, specified through --unicode-subst. */ 496 _("cannot convert unicode substitution to target encoding: %s"), 497 ilseq_unicode_subst_buffer); 498 write_replacement(subst_uc_to_mb_temp_buffer, 499 ilseq_unicode_subst_size*4-outbytesleft, 500 callback_arg); 501} 502 503#if HAVE_WCHAR_T 504 505/* Auxiliary variables for subst_mb_to_wc_fallback. */ 506/* Converter from locale encoding to wchar_t. */ 507static iconv_t subst_mb_to_wc_cd; 508/* Buffer of size ilseq_byte_subst_size. */ 509static wchar_t* subst_mb_to_wc_temp_buffer; 510 511static void subst_mb_to_wc_fallback 512 (const char* inbuf, size_t inbufsize, 513 void (*write_replacement) (const wchar_t *buf, size_t buflen, 514 void* callback_arg), 515 void* callback_arg, 516 void* data) 517{ 518 for (; inbufsize > 0; inbuf++, inbufsize--) { 519 const char* inptr; 520 size_t inbytesleft; 521 char* outptr; 522 size_t outbytesleft; 523 sprintf(ilseq_byte_subst_buffer, 524 ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf); 525 inptr = ilseq_byte_subst_buffer; 526 inbytesleft = strlen(ilseq_byte_subst_buffer); 527 outptr = (char*)subst_mb_to_wc_temp_buffer; 528 outbytesleft = ilseq_byte_subst_size*sizeof(wchar_t); 529 iconv(subst_mb_to_wc_cd,NULL,NULL,NULL,NULL); 530 if (iconv(subst_mb_to_wc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft) 531 == (size_t)(-1) 532 || iconv(subst_mb_to_wc_cd, NULL,NULL, &outptr,&outbytesleft) 533 == (size_t)(-1)) 534 error(EXIT_FAILURE,0, 535 /* TRANSLATORS: An error message. 536 The %s placeholder expands to a piece of text, specified through --byte-subst. */ 537 _("cannot convert byte substitution to wide string: %s"), 538 ilseq_byte_subst_buffer); 539 if (!(outbytesleft%sizeof(wchar_t) == 0)) 540 abort(); 541 write_replacement(subst_mb_to_wc_temp_buffer, 542 ilseq_byte_subst_size-(outbytesleft/sizeof(wchar_t)), 543 callback_arg); 544 } 545} 546 547/* Auxiliary variables for subst_wc_to_mb_fallback. */ 548/* Converter from locale encoding to target encoding. */ 549static iconv_t subst_wc_to_mb_cd; 550/* Buffer of size ilseq_wchar_subst_size*4. 551 Hardcode factor 4, because MB_LEN_MAX is not reliable on some platforms. */ 552static char* subst_wc_to_mb_temp_buffer; 553 554static void subst_wc_to_mb_fallback 555 (wchar_t code, 556 void (*write_replacement) (const char *buf, size_t buflen, 557 void* callback_arg), 558 void* callback_arg, 559 void* data) 560{ 561 const char* inptr; 562 size_t inbytesleft; 563 char* outptr; 564 size_t outbytesleft; 565 sprintf(ilseq_wchar_subst_buffer, ilseq_wchar_subst, (unsigned int) code); 566 inptr = ilseq_wchar_subst_buffer; 567 inbytesleft = strlen(ilseq_wchar_subst_buffer); 568 outptr = subst_wc_to_mb_temp_buffer; 569 outbytesleft = ilseq_wchar_subst_size*4; 570 iconv(subst_wc_to_mb_cd,NULL,NULL,NULL,NULL); 571 if (iconv(subst_wc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft) 572 == (size_t)(-1) 573 || iconv(subst_wc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft) 574 == (size_t)(-1)) 575 error(EXIT_FAILURE,0, 576 /* TRANSLATORS: An error message. 577 The %s placeholder expands to a piece of text, specified through --widechar-subst. */ 578 _("cannot convert widechar substitution to target encoding: %s"), 579 ilseq_wchar_subst_buffer); 580 write_replacement(subst_wc_to_mb_temp_buffer, 581 ilseq_wchar_subst_size*4-outbytesleft, 582 callback_arg); 583} 584 585#else 586 587#define subst_mb_to_wc_fallback NULL 588#define subst_wc_to_mb_fallback NULL 589 590#endif 591 592/* Auxiliary variables for subst_mb_to_mb_fallback. */ 593/* Converter from locale encoding to target encoding. */ 594static iconv_t subst_mb_to_mb_cd; 595/* Buffer of size ilseq_byte_subst_size*4. */ 596static char* subst_mb_to_mb_temp_buffer; 597 598static void subst_mb_to_mb_fallback (const char* inbuf, size_t inbufsize) 599{ 600 for (; inbufsize > 0; inbuf++, inbufsize--) { 601 const char* inptr; 602 size_t inbytesleft; 603 char* outptr; 604 size_t outbytesleft; 605 sprintf(ilseq_byte_subst_buffer, 606 ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf); 607 inptr = ilseq_byte_subst_buffer; 608 inbytesleft = strlen(ilseq_byte_subst_buffer); 609 outptr = subst_mb_to_mb_temp_buffer; 610 outbytesleft = ilseq_byte_subst_size*4; 611 iconv(subst_mb_to_mb_cd,NULL,NULL,NULL,NULL); 612 if (iconv(subst_mb_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft) 613 == (size_t)(-1) 614 || iconv(subst_mb_to_mb_cd, NULL,NULL, &outptr,&outbytesleft) 615 == (size_t)(-1)) 616 error(EXIT_FAILURE,0, 617 /* TRANSLATORS: An error message. 618 The %s placeholder expands to a piece of text, specified through --byte-subst. */ 619 _("cannot convert byte substitution to target encoding: %s"), 620 ilseq_byte_subst_buffer); 621 fwrite(subst_mb_to_mb_temp_buffer,1,ilseq_byte_subst_size*4-outbytesleft, 622 stdout); 623 } 624} 625 626/* ========================================================================= */ 627 628/* Error messages during conversion. */ 629 630static void conversion_error_EILSEQ (const char* infilename) 631{ 632 fflush(stdout); 633 if (column > 0) 634 putc('\n',stderr); 635 error(0,0, 636 /* TRANSLATORS: An error message. 637 The placeholders expand to the input file name, a line number, and a column number. */ 638 _("%s:%u:%u: cannot convert"), 639 infilename,line,column); 640} 641 642static void conversion_error_EINVAL (const char* infilename) 643{ 644 fflush(stdout); 645 if (column > 0) 646 putc('\n',stderr); 647 error(0,0, 648 /* TRANSLATORS: An error message. 649 The placeholders expand to the input file name, a line number, and a column number. 650 A "shift sequence" is a sequence of bytes that changes the state of the converter; 651 this concept exists only for "stateful" encodings like ISO-2022-JP. */ 652 _("%s:%u:%u: incomplete character or shift sequence"), 653 infilename,line,column); 654} 655 656static void conversion_error_other (int errnum, const char* infilename) 657{ 658 fflush(stdout); 659 if (column > 0) 660 putc('\n',stderr); 661 error(0,errnum, 662 /* TRANSLATORS: The first part of an error message. 663 It is followed by a colon and a detail message. 664 The placeholders expand to the input file name, a line number, and a column number. */ 665 _("%s:%u:%u"), 666 infilename,line,column); 667} 668 669/* Convert the input given in infile. */ 670 671static int convert (iconv_t cd, int infile, const char* infilename) 672{ 673 char inbuf[4096+4096]; 674 size_t inbufrest = 0; 675 int infile_error = 0; 676 char initial_outbuf[4096]; 677 char *outbuf = initial_outbuf; 678 size_t outbufsize = sizeof(initial_outbuf); 679 int status = 0; 680 681#if O_BINARY 682 SET_BINARY(infile); 683#endif 684 line = 1; column = 0; 685 iconv(cd,NULL,NULL,NULL,NULL); 686 for (;;) { 687 size_t inbufsize; 688 /* Transfer the accumulated output to its destination, in case the 689 safe_read() call will block. */ 690 fflush(stdout); 691 inbufsize = safe_read(infile,inbuf+4096,4096); 692 if (inbufsize == 0 || inbufsize == SAFE_READ_ERROR) { 693 infile_error = (inbufsize == SAFE_READ_ERROR ? errno : 0); 694 if (inbufrest == 0) 695 break; 696 else { 697 if (ilseq_byte_subst != NULL) 698 subst_mb_to_mb_fallback(inbuf+4096-inbufrest, inbufrest); 699 if (!silent) 700 conversion_error_EINVAL(infilename); 701 status = 1; 702 goto done; 703 } 704 } else { 705 const char* inptr = inbuf+4096-inbufrest; 706 size_t insize = inbufrest+inbufsize; 707 inbufrest = 0; 708 while (insize > 0) { 709 char* outptr = outbuf; 710 size_t outsize = outbufsize; 711 size_t res = iconv(cd,(ICONV_CONST char**)&inptr,&insize,&outptr,&outsize); 712 if (outptr != outbuf) { 713 int saved_errno = errno; 714 if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) { 715 status = 1; 716 goto done; 717 } 718 errno = saved_errno; 719 } 720 if (res == (size_t)(-1)) { 721 if (errno == EILSEQ) { 722 if (discard_unconvertible == 1) { 723 int one = 1; 724 iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one); 725 discard_unconvertible = 2; 726 status = 1; 727 } else { 728 if (!silent) 729 conversion_error_EILSEQ(infilename); 730 status = 1; 731 goto done; 732 } 733 } else if (errno == EINVAL) { 734 if (inbufsize == 0 || insize > 4096) { 735 if (!silent) 736 conversion_error_EINVAL(infilename); 737 status = 1; 738 goto done; 739 } else { 740 inbufrest = insize; 741 if (insize > 0) { 742 /* Like memcpy(inbuf+4096-insize,inptr,insize), except that 743 we cannot use memcpy here, because source and destination 744 regions may overlap. */ 745 char* restptr = inbuf+4096-insize; 746 do { *restptr++ = *inptr++; } while (--insize > 0); 747 } 748 break; 749 } 750 } else if (errno == E2BIG) { 751 if (outptr==outbuf) { 752 /* outbuf is too small. Double its size. */ 753 if (outbuf != initial_outbuf) 754 free(outbuf); 755 outbufsize = 2*outbufsize; 756 if (outbufsize==0) /* integer overflow? */ 757 xalloc_die(); 758 outbuf = (char*)xmalloc(outbufsize); 759 } 760 } else { 761 if (!silent) 762 conversion_error_other(errno,infilename); 763 status = 1; 764 goto done; 765 } 766 } 767 } 768 } 769 } 770 for (;;) { 771 char* outptr = outbuf; 772 size_t outsize = outbufsize; 773 size_t res = iconv(cd,NULL,NULL,&outptr,&outsize); 774 if (outptr != outbuf) { 775 int saved_errno = errno; 776 if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) { 777 status = 1; 778 goto done; 779 } 780 errno = saved_errno; 781 } 782 if (res == (size_t)(-1)) { 783 if (errno == EILSEQ) { 784 if (discard_unconvertible == 1) { 785 int one = 1; 786 iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one); 787 discard_unconvertible = 2; 788 status = 1; 789 } else { 790 if (!silent) 791 conversion_error_EILSEQ(infilename); 792 status = 1; 793 goto done; 794 } 795 } else if (errno == EINVAL) { 796 if (!silent) 797 conversion_error_EINVAL(infilename); 798 status = 1; 799 goto done; 800 } else if (errno == E2BIG) { 801 if (outptr==outbuf) { 802 /* outbuf is too small. Double its size. */ 803 if (outbuf != initial_outbuf) 804 free(outbuf); 805 outbufsize = 2*outbufsize; 806 if (outbufsize==0) /* integer overflow? */ 807 xalloc_die(); 808 outbuf = (char*)xmalloc(outbufsize); 809 } 810 } else { 811 if (!silent) 812 conversion_error_other(errno,infilename); 813 status = 1; 814 goto done; 815 } 816 } else 817 break; 818 } 819 if (infile_error) { 820 fflush(stdout); 821 if (column > 0) 822 putc('\n',stderr); 823 error(0,infile_error, 824 /* TRANSLATORS: An error message. 825 The placeholder expands to the input file name. */ 826 _("%s: I/O error"), 827 infilename); 828 status = 1; 829 goto done; 830 } 831 done: 832 if (outbuf != initial_outbuf) 833 free(outbuf); 834 return status; 835} 836 837/* ========================================================================= */ 838 839int main (int argc, char* argv[]) 840{ 841 const char* fromcode = NULL; 842 const char* tocode = NULL; 843 int do_list = 0; 844 iconv_t cd; 845 struct iconv_fallbacks fallbacks; 846 struct iconv_hooks hooks; 847 int i; 848 int status; 849 850 set_program_name (argv[0]); 851#if HAVE_SETLOCALE 852 /* Needed for the locale dependent encodings, "char" and "wchar_t", 853 and for gettext. */ 854 setlocale(LC_CTYPE,""); 855#if ENABLE_NLS 856 /* Needed for gettext. */ 857 setlocale(LC_MESSAGES,""); 858#endif 859#endif 860#if ENABLE_NLS 861 bindtextdomain("libiconv",relocate(LOCALEDIR)); 862#endif 863 textdomain("libiconv"); 864 for (i = 1; i < argc;) { 865 size_t len = strlen(argv[i]); 866 if (!strcmp(argv[i],"--")) { 867 i++; 868 break; 869 } 870 if (!strcmp(argv[i],"-f") 871 /* --f ... --from-code */ 872 || (len >= 3 && len <= 11 && !strncmp(argv[i],"--from-code",len)) 873 /* --from-code=... */ 874 || (len >= 12 && !strncmp(argv[i],"--from-code=",12))) { 875 if (len < 12) 876 if (i == argc-1) usage(1); 877 if (fromcode != NULL) usage(1); 878 if (len < 12) { 879 fromcode = argv[i+1]; 880 i += 2; 881 } else { 882 fromcode = argv[i]+12; 883 i++; 884 } 885 continue; 886 } 887 if (!strcmp(argv[i],"-t") 888 /* --t ... --to-code */ 889 || (len >= 3 && len <= 9 && !strncmp(argv[i],"--to-code",len)) 890 /* --from-code=... */ 891 || (len >= 10 && !strncmp(argv[i],"--to-code=",10))) { 892 if (len < 10) 893 if (i == argc-1) usage(1); 894 if (tocode != NULL) usage(1); 895 if (len < 10) { 896 tocode = argv[i+1]; 897 i += 2; 898 } else { 899 tocode = argv[i]+10; 900 i++; 901 } 902 continue; 903 } 904 if (!strcmp(argv[i],"-l") 905 /* --l ... --list */ 906 || (len >= 3 && len <= 6 && !strncmp(argv[i],"--list",len))) { 907 do_list = 1; 908 i++; 909 continue; 910 } 911 if (/* --by ... --byte-subst */ 912 (len >= 4 && len <= 12 && !strncmp(argv[i],"--byte-subst",len)) 913 /* --byte-subst=... */ 914 || (len >= 13 && !strncmp(argv[i],"--byte-subst=",13))) { 915 if (len < 13) { 916 if (i == argc-1) usage(1); 917 ilseq_byte_subst = argv[i+1]; 918 i += 2; 919 } else { 920 ilseq_byte_subst = argv[i]+13; 921 i++; 922 } 923 ilseq_byte_subst_size = 924 check_subst_formatstring(ilseq_byte_subst, "--byte-subst"); 925 continue; 926 } 927 if (/* --w ... --widechar-subst */ 928 (len >= 3 && len <= 16 && !strncmp(argv[i],"--widechar-subst",len)) 929 /* --widechar-subst=... */ 930 || (len >= 17 && !strncmp(argv[i],"--widechar-subst=",17))) { 931 if (len < 17) { 932 if (i == argc-1) usage(1); 933 ilseq_wchar_subst = argv[i+1]; 934 i += 2; 935 } else { 936 ilseq_wchar_subst = argv[i]+17; 937 i++; 938 } 939 ilseq_wchar_subst_size = 940 check_subst_formatstring(ilseq_wchar_subst, "--widechar-subst"); 941 continue; 942 } 943 if (/* --u ... --unicode-subst */ 944 (len >= 3 && len <= 15 && !strncmp(argv[i],"--unicode-subst",len)) 945 /* --unicode-subst=... */ 946 || (len >= 16 && !strncmp(argv[i],"--unicode-subst=",16))) { 947 if (len < 16) { 948 if (i == argc-1) usage(1); 949 ilseq_unicode_subst = argv[i+1]; 950 i += 2; 951 } else { 952 ilseq_unicode_subst = argv[i]+16; 953 i++; 954 } 955 ilseq_unicode_subst_size = 956 check_subst_formatstring(ilseq_unicode_subst, "--unicode-subst"); 957 continue; 958 } 959 if /* --s ... --silent */ 960 (len >= 3 && len <= 8 && !strncmp(argv[i],"--silent",len)) { 961 silent = 1; 962 continue; 963 } 964 if /* --h ... --help */ 965 (len >= 3 && len <= 6 && !strncmp(argv[i],"--help",len)) { 966 usage(0); 967 } 968 if /* --v ... --version */ 969 (len >= 3 && len <= 9 && !strncmp(argv[i],"--version",len)) { 970 print_version(); 971 } 972#if O_BINARY 973 /* Backward compatibility with iconv <= 1.9.1. */ 974 if /* --bi ... --binary */ 975 (len >= 4 && len <= 8 && !strncmp(argv[i],"--binary",len)) { 976 i++; 977 continue; 978 } 979#endif 980 if (argv[i][0] == '-') { 981 const char *option = argv[i] + 1; 982 if (*option == '\0') 983 usage(1); 984 for (; *option; option++) 985 switch (*option) { 986 case 'c': discard_unconvertible = 1; break; 987 case 's': silent = 1; break; 988 default: usage(1); 989 } 990 i++; 991 continue; 992 } 993 break; 994 } 995 if (do_list) { 996 if (i != 2 || i != argc) 997 usage(1); 998 iconvlist(print_one,NULL); 999 status = 0; 1000 } else { 1001#if O_BINARY 1002 SET_BINARY(fileno(stdout)); 1003#endif 1004 if (fromcode == NULL) 1005 fromcode = "char"; 1006 if (tocode == NULL) 1007 tocode = "char"; 1008 cd = iconv_open(tocode,fromcode); 1009 if (cd == (iconv_t)(-1)) { 1010 if (iconv_open("UCS-4",fromcode) == (iconv_t)(-1)) 1011 error(0,0, 1012 /* TRANSLATORS: An error message. 1013 The placeholder expands to the encoding name, specified through --from-code. */ 1014 _("conversion from %s unsupported"), 1015 fromcode); 1016 else if (iconv_open(tocode,"UCS-4") == (iconv_t)(-1)) 1017 error(0,0, 1018 /* TRANSLATORS: An error message. 1019 The placeholder expands to the encoding name, specified through --to-code. */ 1020 _("conversion to %s unsupported"), 1021 tocode); 1022 else 1023 error(0,0, 1024 /* TRANSLATORS: An error message. 1025 The placeholders expand to the encoding names, specified through --from-code and --to-code, respectively. */ 1026 _("conversion from %s to %s unsupported"), 1027 fromcode,tocode); 1028 error(EXIT_FAILURE,0, 1029 /* TRANSLATORS: Additional advice after an error message. 1030 The %s placeholder expands to the program name. */ 1031 _("try '%s -l' to get the list of supported encodings"), 1032 program_name); 1033 } 1034 /* Look at fromcode and tocode, to determine whether character widths 1035 should be determined according to legacy CJK conventions. */ 1036 cjkcode = iconv_canonicalize(tocode); 1037 if (!is_cjk_encoding(cjkcode)) 1038 cjkcode = iconv_canonicalize(fromcode); 1039 /* Set up fallback routines for handling impossible conversions. */ 1040 if (ilseq_byte_subst != NULL) 1041 ilseq_byte_subst_buffer = (char*)xmalloc((ilseq_byte_subst_size+1)*sizeof(char)); 1042 if (!discard_unconvertible) { 1043 #if HAVE_WCHAR_T 1044 if (ilseq_wchar_subst != NULL) 1045 ilseq_wchar_subst_buffer = (char*)xmalloc((ilseq_wchar_subst_size+1)*sizeof(char)); 1046 #endif 1047 if (ilseq_unicode_subst != NULL) 1048 ilseq_unicode_subst_buffer = (char*)xmalloc((ilseq_unicode_subst_size+1)*sizeof(char)); 1049 if (ilseq_byte_subst != NULL) { 1050 subst_mb_to_uc_cd = iconv_open("UCS-4-INTERNAL","char"); 1051 subst_mb_to_uc_temp_buffer = (unsigned int*)xmalloc(ilseq_byte_subst_size*sizeof(unsigned int)); 1052 #if HAVE_WCHAR_T 1053 subst_mb_to_wc_cd = iconv_open("wchar_t","char"); 1054 subst_mb_to_wc_temp_buffer = (wchar_t*)xmalloc(ilseq_byte_subst_size*sizeof(wchar_t)); 1055 #endif 1056 subst_mb_to_mb_cd = iconv_open(tocode,"char"); 1057 subst_mb_to_mb_temp_buffer = (char*)xmalloc(ilseq_byte_subst_size*4); 1058 } 1059 #if HAVE_WCHAR_T 1060 if (ilseq_wchar_subst != NULL) { 1061 subst_wc_to_mb_cd = iconv_open(tocode,"char"); 1062 subst_wc_to_mb_temp_buffer = (char*)xmalloc(ilseq_wchar_subst_size*4); 1063 } 1064 #endif 1065 if (ilseq_unicode_subst != NULL) { 1066 subst_uc_to_mb_cd = iconv_open(tocode,"char"); 1067 subst_uc_to_mb_temp_buffer = (char*)xmalloc(ilseq_unicode_subst_size*4); 1068 } 1069 fallbacks.mb_to_uc_fallback = 1070 (ilseq_byte_subst != NULL ? subst_mb_to_uc_fallback : NULL); 1071 fallbacks.uc_to_mb_fallback = 1072 (ilseq_unicode_subst != NULL ? subst_uc_to_mb_fallback : NULL); 1073 fallbacks.mb_to_wc_fallback = 1074 (ilseq_byte_subst != NULL ? subst_mb_to_wc_fallback : NULL); 1075 fallbacks.wc_to_mb_fallback = 1076 (ilseq_wchar_subst != NULL ? subst_wc_to_mb_fallback : NULL); 1077 fallbacks.data = NULL; 1078 iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks); 1079 } 1080 /* Set up hooks for updating the line and column position. */ 1081 hooks.uc_hook = update_line_column; 1082 hooks.wc_hook = NULL; 1083 hooks.data = NULL; 1084 iconvctl(cd, ICONV_SET_HOOKS, &hooks); 1085 if (i == argc) 1086 status = convert(cd,fileno(stdin), 1087 /* TRANSLATORS: A filename substitute denoting standard input. */ 1088 _("(stdin)")); 1089 else { 1090 status = 0; 1091 for (; i < argc; i++) { 1092 const char* infilename = argv[i]; 1093 FILE* infile = fopen(infilename,"r"); 1094 if (infile == NULL) { 1095 int saved_errno = errno; 1096 error(0,saved_errno, 1097 /* TRANSLATORS: The first part of an error message. 1098 It is followed by a colon and a detail message. 1099 The %s placeholder expands to the input file name. */ 1100 _("%s"), 1101 infilename); 1102 status = 1; 1103 } else { 1104 status |= convert(cd,fileno(infile),infilename); 1105 fclose(infile); 1106 } 1107 } 1108 } 1109 iconv_close(cd); 1110 } 1111 if (ferror(stdout) || fclose(stdout)) { 1112 error(0,0, 1113 /* TRANSLATORS: An error message. */ 1114 _("I/O error")); 1115 status = 1; 1116 } 1117 exit(status); 1118} 1119