1/* Copyright (C) 2000-2006 Free Software Foundation, Inc. 2 This file is part of the GNU LIBICONV Library. 3 4 The GNU LIBICONV Library is free software; you can redistribute it 5 and/or modify it under the terms of the GNU Library General Public 6 License as published by the Free Software Foundation; either version 2 7 of the License, or (at your option) any later version. 8 9 The GNU LIBICONV Library is distributed in the hope that it will be 10 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 Library General Public License for more details. 13 14 You should have received a copy of the GNU Library General Public 15 License along with the GNU LIBICONV Library; see the file COPYING.LIB. 16 If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 17 Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19#include "config.h" 20#ifndef ICONV_CONST 21# define ICONV_CONST const 22#endif 23 24#include <limits.h> 25#include <stddef.h> 26#include <stdio.h> 27#include <stdlib.h> 28#include <string.h> 29#include <iconv.h> 30#include <errno.h> 31#if HAVE_LOCALE_H 32#include <locale.h> 33#endif 34#include <fcntl.h> 35 36/* Ensure that iconv_no_i18n does not depend on libintl. */ 37#ifdef NO_I18N 38# undef ENABLE_NLS 39# undef ENABLE_RELOCATABLE 40#endif 41 42#include "binary-io.h" 43#include "exit.h" 44#include "progname.h" 45#include "relocatable.h" 46#include "xalloc.h" 47#include "uniwidth.h" 48#include "cjk.h" 49 50/* Ensure that iconv_no_i18n does not depend on libintl. */ 51#ifdef NO_I18N 52#include <stdarg.h> 53static void 54error (int status, int errnum, const char *message, ...) 55{ 56 va_list args; 57 58 fflush(stdout); 59 fprintf(stderr,"%s: ",program_name); 60 va_start(args,message); 61 vfprintf(stderr,message,args); 62 va_end(args); 63 if (errnum) { 64 const char *s = strerror(errnum); 65 if (s == NULL) 66 s = "Unknown system error"; 67 } 68 putc('\n',stderr); 69 fflush(stderr); 70 if (status) 71 exit(status); 72} 73#else 74# include "error.h" 75#endif 76 77#include "gettext.h" 78 79#define _(str) gettext(str) 80 81/* Ensure that iconv_no_i18n does not depend on libintl. */ 82#ifdef NO_I18N 83# define xmalloc malloc 84# define xalloc_die abort 85#endif 86 87/* Locale independent test for a decimal digit. 88 Argument can be 'char' or 'unsigned char'. (Whereas the argument of 89 <ctype.h> isdigit must be an 'unsigned char'.) */ 90#undef isdigit 91#define isdigit(c) ((unsigned int) ((c) - '0') < 10) 92 93/* Locale independent test for a printable character. 94 Argument can be 'char' or 'unsigned char'. (Whereas the argument of 95 <ctype.h> isdigit must be an 'unsigned char'.) */ 96#define c_isprint(c) ((c) >= ' ' && (c) <= '~') 97 98/* ========================================================================= */ 99 100static int discard_unconvertible = 0; 101static int silent = 0; 102 103static void usage (int exitcode) 104{ 105 if (exitcode != 0) { 106 const char* helpstring1 = 107 _("Usage: iconv [-c] [-s] [-f fromcode] [-t tocode] [file ...]"); 108 const char* helpstring2 = 109 _("or: iconv -l"); 110 fprintf(stderr, "%s\n%s\n", helpstring1, helpstring2); 111 fprintf(stderr, _("Try `%s --help' for more information.\n"), program_name); 112 } else { 113 /* xgettext: no-wrap */ 114 printf(_("\ 115Usage: %s [OPTION...] [-f ENCODING] [-t ENCODING] [INPUTFILE...]\n"), 116 program_name); 117 /* xgettext: no-wrap */ 118 printf(_("\ 119or: %s -l\n"), 120 program_name); 121 printf("\n"); 122 /* xgettext: no-wrap */ 123 printf(_("\ 124Converts text from one encoding to another encoding.\n")); 125 printf("\n"); 126 /* xgettext: no-wrap */ 127 printf(_("\ 128Options controlling the input and output format:\n")); 129 /* xgettext: no-wrap */ 130 printf(_("\ 131 -f ENCODING, --from-code=ENCODING\n\ 132 the encoding of the input\n")); 133 /* xgettext: no-wrap */ 134 printf(_("\ 135 -t ENCODING, --to-code=ENCODING\n\ 136 the encoding of the output\n")); 137 printf("\n"); 138 /* xgettext: no-wrap */ 139 printf(_("\ 140Options controlling conversion problems:\n")); 141 /* xgettext: no-wrap */ 142 printf(_("\ 143 -c discard unconvertible characters\n")); 144 /* xgettext: no-wrap */ 145 printf(_("\ 146 --unicode-subst=FORMATSTRING\n\ 147 substitution for unconvertible Unicode characters\n")); 148 /* xgettext: no-wrap */ 149 printf(_("\ 150 --byte-subst=FORMATSTRING substitution for unconvertible bytes\n")); 151 /* xgettext: no-wrap */ 152 printf(_("\ 153 --widechar-subst=FORMATSTRING\n\ 154 substitution for unconvertible wide characters\n")); 155 printf("\n"); 156 /* xgettext: no-wrap */ 157 printf(_("\ 158Options controlling error output:\n")); 159 /* xgettext: no-wrap */ 160 printf(_("\ 161 -s, --silent suppress error messages about conversion problems\n")); 162 printf("\n"); 163 /* xgettext: no-wrap */ 164 printf(_("\ 165Informative output:\n")); 166 /* xgettext: no-wrap */ 167 printf(_("\ 168 -l, --list list the supported encodings\n")); 169 /* xgettext: no-wrap */ 170 printf(_("\ 171 --help display this help and exit\n")); 172 /* xgettext: no-wrap */ 173 printf(_("\ 174 --version output version information and exit\n")); 175 printf("\n"); 176 fputs(_("Report bugs to <bug-gnu-libiconv@gnu.org>.\n"),stdout); 177 } 178 exit(exitcode); 179} 180 181static void print_version (void) 182{ 183 printf("iconv (GNU libiconv %d.%d)\n", 184 _libiconv_version >> 8, _libiconv_version & 0xff); 185 printf("Copyright (C) %s Free Software Foundation, Inc.\n", "2000-2006"); 186 printf(_("\ 187This is free software; see the source for copying conditions. There is NO\n\ 188warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n")); 189 printf(_("Written by %s.\n"),"Bruno Haible"); 190 exit(EXIT_SUCCESS); 191} 192 193static int print_one (unsigned int namescount, const char * const * names, 194 void* data) 195{ 196 unsigned int i; 197 (void)data; 198 for (i = 0; i < namescount; i++) { 199 if (i > 0) 200 putc(' ',stdout); 201 fputs(names[i],stdout); 202 } 203 putc('\n',stdout); 204 return 0; 205} 206 207/* ========================================================================= */ 208 209/* Line number and column position. */ 210static unsigned int line; 211static unsigned int column; 212static const char* cjkcode; 213/* Update the line number and column position after a character was 214 successfully converted. */ 215static void update_line_column (unsigned int uc, void* data) 216{ 217 if (uc == 0x000A) { 218 line++; 219 column = 0; 220 } else { 221 int width = uc_width(uc, cjkcode); 222 if (width >= 0) 223 column += width; 224 else if (uc == 0x0009) 225 column += 8 - (column % 8); 226 } 227} 228 229/* ========================================================================= */ 230 231/* Production of placeholder strings as fallback for unconvertible 232 characters. */ 233 234/* Check that the argument is a format string taking either no argument 235 or exactly one unsigned integer argument. Returns the maximum output 236 size of the format string. */ 237static size_t check_subst_formatstring (const char *format, const char *param_name) 238{ 239 /* C format strings are described in POSIX (IEEE P1003.1 2001), section 240 XSH 3 fprintf(). See also Linux fprintf(3) manual page. 241 For simplicity, we don't accept 242 - the '%m$' reordering syntax, 243 - the 'I' flag, 244 - width specifications referring to an argument, 245 - precision specifications referring to an argument, 246 - size specifiers, 247 - format specifiers other than 'o', 'u', 'x', 'X'. 248 What remains? 249 A directive 250 - starts with '%', 251 - is optionally followed by any of the characters '#', '0', '-', ' ', 252 '+', "'", each of which acts as a flag, 253 - is optionally followed by a width specification: a nonempty digit 254 sequence, 255 - is optionally followed by '.' and a precision specification: a 256 nonempty digit sequence, 257 - is finished by a specifier 258 - '%', that needs no argument, 259 - 'o', 'u', 'x', 'X', that need an unsigned integer argument. 260 */ 261 size_t maxsize = 0; 262 unsigned int unnumbered_arg_count = 0; 263 264 for (; *format != '\0';) { 265 if (*format++ == '%') { 266 /* A directive. */ 267 unsigned int width = 0; 268 unsigned int precision = 0; 269 unsigned int length; 270 /* Parse flags. */ 271 for (;;) { 272 if (*format == ' ' || *format == '+' || *format == '-' 273 || *format == '#' || *format == '0' || *format == '\'') 274 format++; 275 else 276 break; 277 } 278 /* Parse width. */ 279 if (*format == '*') 280 error(EXIT_FAILURE,0,_("%s argument: A format directive with a variable width is not allowed here."),param_name); 281 if (isdigit (*format)) { 282 do { 283 width = 10*width + (*format - '0'); 284 format++; 285 } while (isdigit (*format)); 286 } 287 /* Parse precision. */ 288 if (*format == '.') { 289 format++; 290 if (*format == '*') 291 error(EXIT_FAILURE,0,_("%s argument: A format directive with a variable precision is not allowed here."),param_name); 292 if (isdigit (*format)) { 293 do { 294 precision = 10*precision + (*format - '0'); 295 format++; 296 } while (isdigit (*format)); 297 } 298 } 299 /* Parse size. */ 300 switch (*format) { 301 case 'h': case 'l': case 'L': case 'q': 302 case 'j': case 'z': case 'Z': case 't': 303 error(EXIT_FAILURE,0,_("%s argument: A format directive with a size is not allowed here."),param_name); 304 } 305 /* Parse end of directive. */ 306 switch (*format) { 307 case '%': 308 length = 1; 309 break; 310 case 'u': case 'o': case 'x': case 'X': 311 if (*format == 'u') { 312 length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT 313 * 0.30103 /* binary -> decimal */ 314 ) 315 + 1; /* turn floor into ceil */ 316 if (length < precision) 317 length = precision; 318 length *= 2; /* estimate for FLAG_GROUP */ 319 length += 1; /* account for leading sign */ 320 } else if (*format == 'o') { 321 length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT 322 * 0.333334 /* binary -> octal */ 323 ) 324 + 1; /* turn floor into ceil */ 325 if (length < precision) 326 length = precision; 327 length += 1; /* account for leading sign */ 328 } else { /* 'x', 'X' */ 329 length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT 330 * 0.25 /* binary -> hexadecimal */ 331 ) 332 + 1; /* turn floor into ceil */ 333 if (length < precision) 334 length = precision; 335 length += 2; /* account for leading sign or alternate form */ 336 } 337 unnumbered_arg_count++; 338 break; 339 default: 340 if (*format == '\0') 341 error(EXIT_FAILURE,0,_("%s argument: The string ends in the middle of a directive."),param_name); 342 else if (c_isprint(*format)) 343 error(EXIT_FAILURE,0,_("%s argument: The character '%c' is not a valid conversion specifier."),param_name,*format); 344 else 345 error(EXIT_FAILURE,0,_("%s argument: The character that terminates the format directive is not a valid conversion specifier."),param_name); 346 abort(); /*NOTREACHED*/ 347 } 348 format++; 349 if (length < width) 350 length = width; 351 maxsize += length; 352 } else 353 maxsize++; 354 } 355 if (unnumbered_arg_count > 1) 356 error(EXIT_FAILURE,0,ngettext("%s argument: The format string consumes more than one argument: %u argument.", 357 "%s argument: The format string consumes more than one argument: %u arguments.", 358 unnumbered_arg_count), 359 param_name,unnumbered_arg_count); 360 return maxsize; 361} 362 363/* Format strings. */ 364static const char* ilseq_byte_subst; 365static const char* ilseq_wchar_subst; 366static const char* ilseq_unicode_subst; 367 368/* Maximum result size for each format string. */ 369static size_t ilseq_byte_subst_size; 370static size_t ilseq_wchar_subst_size; 371static size_t ilseq_unicode_subst_size; 372 373/* Buffer of size ilseq_byte_subst_size+1. */ 374static char* ilseq_byte_subst_buffer; 375#if HAVE_WCHAR_T 376/* Buffer of size ilseq_wchar_subst_size+1. */ 377static char* ilseq_wchar_subst_buffer; 378#endif 379/* Buffer of size ilseq_unicode_subst_size+1. */ 380static char* ilseq_unicode_subst_buffer; 381 382/* Auxiliary variables for subst_mb_to_uc_fallback. */ 383/* Converter from locale encoding to UCS-4. */ 384static iconv_t subst_mb_to_uc_cd; 385/* Buffer of size ilseq_byte_subst_size. */ 386static unsigned int* subst_mb_to_uc_temp_buffer; 387 388static void subst_mb_to_uc_fallback 389 (const char* inbuf, size_t inbufsize, 390 void (*write_replacement) (const unsigned int *buf, size_t buflen, 391 void* callback_arg), 392 void* callback_arg, 393 void* data) 394{ 395 for (; inbufsize > 0; inbuf++, inbufsize--) { 396 const char* inptr; 397 size_t inbytesleft; 398 char* outptr; 399 size_t outbytesleft; 400 sprintf(ilseq_byte_subst_buffer, 401 ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf); 402 inptr = ilseq_byte_subst_buffer; 403 inbytesleft = strlen(ilseq_byte_subst_buffer); 404 outptr = (char*)subst_mb_to_uc_temp_buffer; 405 outbytesleft = ilseq_byte_subst_size*sizeof(unsigned int); 406 iconv(subst_mb_to_uc_cd,NULL,NULL,NULL,NULL); 407 if (iconv(subst_mb_to_uc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft) 408 == (size_t)(-1) 409 || iconv(subst_mb_to_uc_cd, NULL,NULL, &outptr,&outbytesleft) 410 == (size_t)(-1)) 411 error(EXIT_FAILURE,0,_("cannot convert byte substitution to Unicode: %s"),ilseq_byte_subst_buffer); 412 if (!(outbytesleft%sizeof(unsigned int) == 0)) 413 abort(); 414 write_replacement(subst_mb_to_uc_temp_buffer, 415 ilseq_byte_subst_size-(outbytesleft/sizeof(unsigned int)), 416 callback_arg); 417 } 418} 419 420/* Auxiliary variables for subst_uc_to_mb_fallback. */ 421/* Converter from locale encoding to target encoding. */ 422static iconv_t subst_uc_to_mb_cd; 423/* Buffer of size ilseq_unicode_subst_size*4. */ 424static char* subst_uc_to_mb_temp_buffer; 425 426static void subst_uc_to_mb_fallback 427 (unsigned int code, 428 void (*write_replacement) (const char *buf, size_t buflen, 429 void* callback_arg), 430 void* callback_arg, 431 void* data) 432{ 433 const char* inptr; 434 size_t inbytesleft; 435 char* outptr; 436 size_t outbytesleft; 437 sprintf(ilseq_unicode_subst_buffer, ilseq_unicode_subst, code); 438 inptr = ilseq_unicode_subst_buffer; 439 inbytesleft = strlen(ilseq_unicode_subst_buffer); 440 outptr = subst_uc_to_mb_temp_buffer; 441 outbytesleft = ilseq_unicode_subst_size*4; 442 iconv(subst_uc_to_mb_cd,NULL,NULL,NULL,NULL); 443 if (iconv(subst_uc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft) 444 == (size_t)(-1) 445 || iconv(subst_uc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft) 446 == (size_t)(-1)) 447 error(EXIT_FAILURE,0,_("cannot convert unicode substitution to target encoding: %s"),ilseq_unicode_subst_buffer); 448 write_replacement(subst_uc_to_mb_temp_buffer, 449 ilseq_unicode_subst_size*4-outbytesleft, 450 callback_arg); 451} 452 453#if HAVE_WCHAR_T 454 455/* Auxiliary variables for subst_mb_to_wc_fallback. */ 456/* Converter from locale encoding to wchar_t. */ 457static iconv_t subst_mb_to_wc_cd; 458/* Buffer of size ilseq_byte_subst_size. */ 459static wchar_t* subst_mb_to_wc_temp_buffer; 460 461static void subst_mb_to_wc_fallback 462 (const char* inbuf, size_t inbufsize, 463 void (*write_replacement) (const wchar_t *buf, size_t buflen, 464 void* callback_arg), 465 void* callback_arg, 466 void* data) 467{ 468 for (; inbufsize > 0; inbuf++, inbufsize--) { 469 const char* inptr; 470 size_t inbytesleft; 471 char* outptr; 472 size_t outbytesleft; 473 sprintf(ilseq_byte_subst_buffer, 474 ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf); 475 inptr = ilseq_byte_subst_buffer; 476 inbytesleft = strlen(ilseq_byte_subst_buffer); 477 outptr = (char*)subst_mb_to_wc_temp_buffer; 478 outbytesleft = ilseq_byte_subst_size*sizeof(wchar_t); 479 iconv(subst_mb_to_wc_cd,NULL,NULL,NULL,NULL); 480 if (iconv(subst_mb_to_wc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft) 481 == (size_t)(-1) 482 || iconv(subst_mb_to_wc_cd, NULL,NULL, &outptr,&outbytesleft) 483 == (size_t)(-1)) 484 error(EXIT_FAILURE,0,_("cannot convert byte substitution to wide string: %s"),ilseq_byte_subst_buffer); 485 if (!(outbytesleft%sizeof(wchar_t) == 0)) 486 abort(); 487 write_replacement(subst_mb_to_wc_temp_buffer, 488 ilseq_byte_subst_size-(outbytesleft/sizeof(wchar_t)), 489 callback_arg); 490 } 491} 492 493/* Auxiliary variables for subst_wc_to_mb_fallback. */ 494/* Converter from locale encoding to target encoding. */ 495static iconv_t subst_wc_to_mb_cd; 496/* Buffer of size ilseq_wchar_subst_size*4. 497 Hardcode factor 4, because MB_LEN_MAX is not reliable on some platforms. */ 498static char* subst_wc_to_mb_temp_buffer; 499 500static void subst_wc_to_mb_fallback 501 (wchar_t code, 502 void (*write_replacement) (const char *buf, size_t buflen, 503 void* callback_arg), 504 void* callback_arg, 505 void* data) 506{ 507 const char* inptr; 508 size_t inbytesleft; 509 char* outptr; 510 size_t outbytesleft; 511 sprintf(ilseq_wchar_subst_buffer, ilseq_wchar_subst, (unsigned int) code); 512 inptr = ilseq_wchar_subst_buffer; 513 inbytesleft = strlen(ilseq_wchar_subst_buffer); 514 outptr = subst_wc_to_mb_temp_buffer; 515 outbytesleft = ilseq_wchar_subst_size*4; 516 iconv(subst_wc_to_mb_cd,NULL,NULL,NULL,NULL); 517 if (iconv(subst_wc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft) 518 == (size_t)(-1) 519 || iconv(subst_wc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft) 520 == (size_t)(-1)) 521 error(EXIT_FAILURE,0,_("cannot convert widechar substitution to target encoding: %s"),ilseq_wchar_subst_buffer); 522 write_replacement(subst_wc_to_mb_temp_buffer, 523 ilseq_wchar_subst_size*4-outbytesleft, 524 callback_arg); 525} 526 527#else 528 529#define subst_mb_to_wc_fallback NULL 530#define subst_wc_to_mb_fallback NULL 531 532#endif 533 534/* Auxiliary variables for subst_mb_to_mb_fallback. */ 535/* Converter from locale encoding to target encoding. */ 536static iconv_t subst_mb_to_mb_cd; 537/* Buffer of size ilseq_byte_subst_size*4. */ 538static char* subst_mb_to_mb_temp_buffer; 539 540static void subst_mb_to_mb_fallback (const char* inbuf, size_t inbufsize) 541{ 542 for (; inbufsize > 0; inbuf++, inbufsize--) { 543 const char* inptr; 544 size_t inbytesleft; 545 char* outptr; 546 size_t outbytesleft; 547 sprintf(ilseq_byte_subst_buffer, 548 ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf); 549 inptr = ilseq_byte_subst_buffer; 550 inbytesleft = strlen(ilseq_byte_subst_buffer); 551 outptr = subst_mb_to_mb_temp_buffer; 552 outbytesleft = ilseq_byte_subst_size*4; 553 iconv(subst_mb_to_mb_cd,NULL,NULL,NULL,NULL); 554 if (iconv(subst_mb_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft) 555 == (size_t)(-1) 556 || iconv(subst_mb_to_mb_cd, NULL,NULL, &outptr,&outbytesleft) 557 == (size_t)(-1)) 558 error(EXIT_FAILURE,0,_("cannot convert byte substitution to target encoding: %s"),ilseq_byte_subst_buffer); 559 fwrite(subst_mb_to_mb_temp_buffer,1,ilseq_byte_subst_size*4-outbytesleft, 560 stdout); 561 } 562} 563 564/* ========================================================================= */ 565 566static int convert (iconv_t cd, FILE* infile, const char* infilename) 567{ 568 char inbuf[4096+4096]; 569 size_t inbufrest = 0; 570 char initial_outbuf[4096]; 571 char *outbuf = initial_outbuf; 572 size_t outbufsize = sizeof(initial_outbuf); 573 int status = 0; 574 575#if O_BINARY 576 SET_BINARY(fileno(infile)); 577#endif 578 line = 1; column = 0; 579 iconv(cd,NULL,NULL,NULL,NULL); 580 for (;;) { 581 size_t inbufsize = fread(inbuf+4096,1,4096,infile); 582 if (inbufsize == 0) { 583 if (inbufrest == 0) 584 break; 585 else { 586 if (ilseq_byte_subst != NULL) 587 subst_mb_to_mb_fallback(inbuf+4096-inbufrest, inbufrest); 588 if (!silent) { 589 fflush(stdout); 590 if (column > 0) 591 putc('\n',stderr); 592 error(0,0,_("%s:%u:%u: incomplete character or shift sequence"),infilename,line,column); 593 } 594 status = 1; 595 goto done; 596 } 597 } else { 598 const char* inptr = inbuf+4096-inbufrest; 599 size_t insize = inbufrest+inbufsize; 600 inbufrest = 0; 601 while (insize > 0) { 602 char* outptr = outbuf; 603 size_t outsize = outbufsize; 604 size_t res = iconv(cd,(ICONV_CONST char**)&inptr,&insize,&outptr,&outsize); 605 if (outptr != outbuf) { 606 int saved_errno = errno; 607 if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) { 608 status = 1; 609 goto done; 610 } 611 errno = saved_errno; 612 } 613 if (res == (size_t)(-1)) { 614 if (errno == EILSEQ) { 615 if (discard_unconvertible == 1) { 616 int one = 1; 617 iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one); 618 discard_unconvertible = 2; 619 status = 1; 620 } else { 621 if (!silent) { 622 fflush(stdout); 623 if (column > 0) 624 putc('\n',stderr); 625 error(0,0,_("%s:%u:%u: cannot convert"),infilename,line,column); 626 } 627 status = 1; 628 goto done; 629 } 630 } else if (errno == EINVAL) { 631 if (inbufsize == 0 || insize > 4096) { 632 if (!silent) { 633 fflush(stdout); 634 if (column > 0) 635 putc('\n',stderr); 636 error(0,0,_("%s:%u:%u: incomplete character or shift sequence"),infilename,line,column); 637 } 638 status = 1; 639 goto done; 640 } else { 641 inbufrest = insize; 642 if (insize > 0) { 643 /* Like memcpy(inbuf+4096-insize,inptr,insize), except that 644 we cannot use memcpy here, because source and destination 645 regions may overlap. */ 646 char* restptr = inbuf+4096-insize; 647 do { *restptr++ = *inptr++; } while (--insize > 0); 648 } 649 break; 650 } 651 } else if (errno == E2BIG) { 652 if (outptr==outbuf) { 653 /* outbuf is too small. Double its size. */ 654 if (outbuf != initial_outbuf) 655 free(outbuf); 656 outbufsize = 2*outbufsize; 657 if (outbufsize==0) /* integer overflow? */ 658 xalloc_die(); 659 outbuf = (char*)xmalloc(outbufsize); 660 } 661 } else { 662 if (!silent) { 663 int saved_errno = errno; 664 fflush(stdout); 665 if (column > 0) 666 putc('\n',stderr); 667 error(0,saved_errno,_("%s:%u:%u"),infilename,line,column); 668 } 669 status = 1; 670 goto done; 671 } 672 } 673 } 674 } 675 } 676 for (;;) { 677 char* outptr = outbuf; 678 size_t outsize = outbufsize; 679 size_t res = iconv(cd,NULL,NULL,&outptr,&outsize); 680 if (outptr != outbuf) { 681 int saved_errno = errno; 682 if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) { 683 status = 1; 684 goto done; 685 } 686 errno = saved_errno; 687 } 688 if (res == (size_t)(-1)) { 689 if (errno == EILSEQ) { 690 if (discard_unconvertible == 1) { 691 int one = 1; 692 iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one); 693 discard_unconvertible = 2; 694 status = 1; 695 } else { 696 if (!silent) { 697 fflush(stdout); 698 if (column > 0) 699 putc('\n',stderr); 700 error(0,0,_("%s:%u:%u: cannot convert"),infilename,line,column); 701 } 702 status = 1; 703 goto done; 704 } 705 } else if (errno == EINVAL) { 706 if (!silent) { 707 fflush(stdout); 708 if (column > 0) 709 putc('\n',stderr); 710 error(0,0,_("%s:%u:%u: incomplete character or shift sequence"),infilename,line,column); 711 } 712 status = 1; 713 goto done; 714 } else if (errno == E2BIG) { 715 if (outptr==outbuf) { 716 /* outbuf is too small. Double its size. */ 717 if (outbuf != initial_outbuf) 718 free(outbuf); 719 outbufsize = 2*outbufsize; 720 if (outbufsize==0) /* integer overflow? */ 721 xalloc_die(); 722 outbuf = (char*)xmalloc(outbufsize); 723 } 724 } else { 725 if (!silent) { 726 int saved_errno = errno; 727 fflush(stdout); 728 if (column > 0) 729 putc('\n',stderr); 730 error(0,saved_errno,_("%s:%u:%u"),infilename,line,column); 731 } 732 status = 1; 733 goto done; 734 } 735 } else 736 break; 737 } 738 if (ferror(infile)) { 739 fflush(stdout); 740 if (column > 0) 741 putc('\n',stderr); 742 error(0,0,_("%s: I/O error"),infilename); 743 status = 1; 744 goto done; 745 } 746 done: 747 if (outbuf != initial_outbuf) 748 free(outbuf); 749 return status; 750} 751 752/* ========================================================================= */ 753 754int main (int argc, char* argv[]) 755{ 756 const char* fromcode = NULL; 757 const char* tocode = NULL; 758 int do_list = 0; 759 iconv_t cd; 760 struct iconv_fallbacks fallbacks; 761 struct iconv_hooks hooks; 762 int i; 763 int status; 764 765 set_program_name (argv[0]); 766#if HAVE_SETLOCALE 767 /* Needed for the locale dependent encodings, "char" and "wchar_t", 768 and for gettext. */ 769 setlocale(LC_CTYPE,""); 770#if ENABLE_NLS 771 /* Needed for gettext. */ 772 setlocale(LC_MESSAGES,""); 773#endif 774#endif 775#if ENABLE_NLS 776 bindtextdomain("libiconv",relocate(LOCALEDIR)); 777 textdomain("libiconv"); 778#endif 779 for (i = 1; i < argc;) { 780 size_t len = strlen(argv[i]); 781 if (!strcmp(argv[i],"--")) { 782 i++; 783 break; 784 } 785 if (!strcmp(argv[i],"-f") 786 /* --f ... --from-code */ 787 || (len >= 3 && len <= 11 && !strncmp(argv[i],"--from-code",len)) 788 /* --from-code=... */ 789 || (len >= 12 && !strncmp(argv[i],"--from-code=",12))) { 790 if (len < 12) 791 if (i == argc-1) usage(1); 792 if (fromcode != NULL) usage(1); 793 if (len < 12) { 794 fromcode = argv[i+1]; 795 i += 2; 796 } else { 797 fromcode = argv[i]+12; 798 i++; 799 } 800 continue; 801 } 802 if (!strcmp(argv[i],"-t") 803 /* --t ... --to-code */ 804 || (len >= 3 && len <= 9 && !strncmp(argv[i],"--to-code",len)) 805 /* --from-code=... */ 806 || (len >= 10 && !strncmp(argv[i],"--to-code=",10))) { 807 if (len < 10) 808 if (i == argc-1) usage(1); 809 if (tocode != NULL) usage(1); 810 if (len < 10) { 811 tocode = argv[i+1]; 812 i += 2; 813 } else { 814 tocode = argv[i]+10; 815 i++; 816 } 817 continue; 818 } 819 if (!strcmp(argv[i],"-l") 820 /* --l ... --list */ 821 || (len >= 3 && len <= 6 && !strncmp(argv[i],"--list",len))) { 822 do_list = 1; 823 i++; 824 continue; 825 } 826 if (/* --by ... --byte-subst */ 827 (len >= 4 && len <= 12 && !strncmp(argv[i],"--byte-subst",len)) 828 /* --byte-subst=... */ 829 || (len >= 13 && !strncmp(argv[i],"--byte-subst=",13))) { 830 if (len < 13) { 831 if (i == argc-1) usage(1); 832 ilseq_byte_subst = argv[i+1]; 833 i += 2; 834 } else { 835 ilseq_byte_subst = argv[i]+13; 836 i++; 837 } 838 ilseq_byte_subst_size = 839 check_subst_formatstring(ilseq_byte_subst, "--byte-subst"); 840 continue; 841 } 842 if (/* --w ... --widechar-subst */ 843 (len >= 3 && len <= 16 && !strncmp(argv[i],"--widechar-subst",len)) 844 /* --widechar-subst=... */ 845 || (len >= 17 && !strncmp(argv[i],"--widechar-subst=",17))) { 846 if (len < 17) { 847 if (i == argc-1) usage(1); 848 ilseq_wchar_subst = argv[i+1]; 849 i += 2; 850 } else { 851 ilseq_wchar_subst = argv[i]+17; 852 i++; 853 } 854 ilseq_wchar_subst_size = 855 check_subst_formatstring(ilseq_wchar_subst, "--widechar-subst"); 856 continue; 857 } 858 if (/* --u ... --unicode-subst */ 859 (len >= 3 && len <= 15 && !strncmp(argv[i],"--unicode-subst",len)) 860 /* --unicode-subst=... */ 861 || (len >= 16 && !strncmp(argv[i],"--unicode-subst=",16))) { 862 if (len < 16) { 863 if (i == argc-1) usage(1); 864 ilseq_unicode_subst = argv[i+1]; 865 i += 2; 866 } else { 867 ilseq_unicode_subst = argv[i]+16; 868 i++; 869 } 870 ilseq_unicode_subst_size = 871 check_subst_formatstring(ilseq_unicode_subst, "--unicode-subst"); 872 continue; 873 } 874 if /* --s ... --silent */ 875 (len >= 3 && len <= 8 && !strncmp(argv[i],"--silent",len)) { 876 silent = 1; 877 continue; 878 } 879 if /* --h ... --help */ 880 (len >= 3 && len <= 6 && !strncmp(argv[i],"--help",len)) { 881 usage(0); 882 } 883 if /* --v ... --version */ 884 (len >= 3 && len <= 9 && !strncmp(argv[i],"--version",len)) { 885 print_version(); 886 } 887#if O_BINARY 888 /* Backward compatibility with iconv <= 1.9.1. */ 889 if /* --bi ... --binary */ 890 (len >= 4 && len <= 8 && !strncmp(argv[i],"--binary",len)) { 891 i++; 892 continue; 893 } 894#endif 895 if (argv[i][0] == '-') { 896 const char *option = argv[i] + 1; 897 if (*option == '\0') 898 usage(1); 899 if (!strcmp(option,"-")) { /* handle -- option delimiter */ 900 i++; 901 break; 902 } 903 for (; *option; option++) 904 switch (*option) { 905 case 'c': discard_unconvertible = 1; break; 906 case 's': silent = 1; break; 907 default: usage(1); 908 } 909 i++; 910 continue; 911 } 912 break; 913 } 914 if (do_list) { 915 if (i != 2 || i != argc) 916 usage(1); 917 iconvlist(print_one,NULL); 918 status = 0; 919 } else { 920#if O_BINARY 921 SET_BINARY(fileno(stdout)); 922#endif 923 if (fromcode == NULL) 924 fromcode = "char"; 925 if (tocode == NULL) 926 tocode = "char"; 927 cd = iconv_open(tocode,fromcode); 928 if (cd == (iconv_t)(-1)) { 929 if (iconv_open("UCS-4",fromcode) == (iconv_t)(-1)) 930 error(0,0,_("conversion from %s unsupported"),fromcode); 931 else if (iconv_open(tocode,"UCS-4") == (iconv_t)(-1)) 932 error(0,0,_("conversion to %s unsupported"),tocode); 933 else 934 error(0,0,_("conversion from %s to %s unsupported"),fromcode,tocode); 935 error(EXIT_FAILURE,0,_("try '%s -l' to get the list of supported encodings"),program_name); 936 } 937 /* Look at fromcode and tocode, to determine whether character widths 938 should be determined according to legacy CJK conventions. */ 939 cjkcode = iconv_canonicalize(tocode); 940 if (!is_cjk_encoding(cjkcode)) 941 cjkcode = iconv_canonicalize(fromcode); 942 /* Set up fallback routines for handling impossible conversions. */ 943 if (ilseq_byte_subst != NULL) 944 ilseq_byte_subst_buffer = (char*)xmalloc((ilseq_byte_subst_size+1)*sizeof(char)); 945 if (!discard_unconvertible) { 946 #if HAVE_WCHAR_T 947 if (ilseq_wchar_subst != NULL) 948 ilseq_wchar_subst_buffer = (char*)xmalloc((ilseq_wchar_subst_size+1)*sizeof(char)); 949 #endif 950 if (ilseq_unicode_subst != NULL) 951 ilseq_unicode_subst_buffer = (char*)xmalloc((ilseq_unicode_subst_size+1)*sizeof(char)); 952 if (ilseq_byte_subst != NULL) { 953 subst_mb_to_uc_cd = iconv_open("UCS-4-INTERNAL","char"); 954 subst_mb_to_uc_temp_buffer = (unsigned int*)xmalloc(ilseq_byte_subst_size*sizeof(unsigned int)); 955 #if HAVE_WCHAR_T 956 subst_mb_to_wc_cd = iconv_open("wchar_t","char"); 957 subst_mb_to_wc_temp_buffer = (wchar_t*)xmalloc(ilseq_byte_subst_size*sizeof(wchar_t)); 958 #endif 959 subst_mb_to_mb_cd = iconv_open(tocode,"char"); 960 subst_mb_to_mb_temp_buffer = (char*)xmalloc(ilseq_byte_subst_size*4); 961 } 962 #if HAVE_WCHAR_T 963 if (ilseq_wchar_subst != NULL) { 964 subst_wc_to_mb_cd = iconv_open(tocode,"char"); 965 subst_wc_to_mb_temp_buffer = (char*)xmalloc(ilseq_wchar_subst_size*4); 966 } 967 #endif 968 if (ilseq_unicode_subst != NULL) { 969 subst_uc_to_mb_cd = iconv_open(tocode,"char"); 970 subst_uc_to_mb_temp_buffer = (char*)xmalloc(ilseq_unicode_subst_size*4); 971 } 972 fallbacks.mb_to_uc_fallback = 973 (ilseq_byte_subst != NULL ? subst_mb_to_uc_fallback : NULL); 974 fallbacks.uc_to_mb_fallback = 975 (ilseq_unicode_subst != NULL ? subst_uc_to_mb_fallback : NULL); 976 fallbacks.mb_to_wc_fallback = 977 (ilseq_byte_subst != NULL ? subst_mb_to_wc_fallback : NULL); 978 fallbacks.wc_to_mb_fallback = 979 (ilseq_wchar_subst != NULL ? subst_wc_to_mb_fallback : NULL); 980 fallbacks.data = NULL; 981 iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks); 982 } 983 /* Set up hooks for updating the line and column position. */ 984 hooks.uc_hook = update_line_column; 985 hooks.wc_hook = NULL; 986 hooks.data = NULL; 987 iconvctl(cd, ICONV_SET_HOOKS, &hooks); 988 if (i == argc) 989 status = convert(cd,stdin,_("(stdin)")); 990 else { 991 status = 0; 992 for (; i < argc; i++) { 993 const char* infilename = argv[i]; 994 FILE* infile = fopen(infilename,"r"); 995 if (infile == NULL) { 996 int saved_errno = errno; 997 error(0,saved_errno,_("%s"),infilename); 998 status = 1; 999 } else { 1000 status |= convert(cd,infile,infilename); 1001 fclose(infile); 1002 } 1003 } 1004 } 1005 iconv_close(cd); 1006 } 1007 if (ferror(stdout) || fclose(stdout)) { 1008 error(0,0,_("I/O error")); 1009 status = 1; 1010 } 1011 exit(status); 1012} 1013