1/* Copyright (C) 2000-2006 Free Software Foundation, Inc.
2   This file is part of the GNU LIBICONV Library.
3
4   The GNU LIBICONV Library is free software; you can redistribute it
5   and/or modify it under the terms of the GNU Library General Public
6   License as published by the Free Software Foundation; either version 2
7   of the License, or (at your option) any later version.
8
9   The GNU LIBICONV Library is distributed in the hope that it will be
10   useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   Library General Public License for more details.
13
14   You should have received a copy of the GNU Library General Public
15   License along with the GNU LIBICONV Library; see the file COPYING.LIB.
16   If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
17   Fifth Floor, Boston, MA 02110-1301, USA.  */
18
19#include "config.h"
20#ifndef ICONV_CONST
21# define ICONV_CONST const
22#endif
23
24#include <limits.h>
25#include <stddef.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#include <iconv.h>
30#include <errno.h>
31#if HAVE_LOCALE_H
32#include <locale.h>
33#endif
34#include <fcntl.h>
35
36/* Ensure that iconv_no_i18n does not depend on libintl.  */
37#ifdef NO_I18N
38# undef ENABLE_NLS
39# undef ENABLE_RELOCATABLE
40#endif
41
42#include "binary-io.h"
43#include "exit.h"
44#include "progname.h"
45#include "relocatable.h"
46#include "xalloc.h"
47#include "uniwidth.h"
48#include "cjk.h"
49
50/* Ensure that iconv_no_i18n does not depend on libintl.  */
51#ifdef NO_I18N
52#include <stdarg.h>
53static void
54error (int status, int errnum, const char *message, ...)
55{
56  va_list args;
57
58  fflush(stdout);
59  fprintf(stderr,"%s: ",program_name);
60  va_start(args,message);
61  vfprintf(stderr,message,args);
62  va_end(args);
63  if (errnum) {
64    const char *s = strerror(errnum);
65    if (s == NULL)
66      s = "Unknown system error";
67  }
68  putc('\n',stderr);
69  fflush(stderr);
70  if (status)
71    exit(status);
72}
73#else
74# include "error.h"
75#endif
76
77#include "gettext.h"
78
79#define _(str) gettext(str)
80
81/* Ensure that iconv_no_i18n does not depend on libintl.  */
82#ifdef NO_I18N
83# define xmalloc malloc
84# define xalloc_die abort
85#endif
86
87/* Locale independent test for a decimal digit.
88   Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
89   <ctype.h> isdigit must be an 'unsigned char'.)  */
90#undef isdigit
91#define isdigit(c) ((unsigned int) ((c) - '0') < 10)
92
93/* Locale independent test for a printable character.
94   Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
95   <ctype.h> isdigit must be an 'unsigned char'.)  */
96#define c_isprint(c) ((c) >= ' ' && (c) <= '~')
97
98/* ========================================================================= */
99
100static int discard_unconvertible = 0;
101static int silent = 0;
102
103static void usage (int exitcode)
104{
105  if (exitcode != 0) {
106    const char* helpstring1 =
107      _("Usage: iconv [-c] [-s] [-f fromcode] [-t tocode] [file ...]");
108    const char* helpstring2 =
109      _("or:    iconv -l");
110    fprintf(stderr, "%s\n%s\n", helpstring1, helpstring2);
111    fprintf(stderr, _("Try `%s --help' for more information.\n"), program_name);
112  } else {
113    /* xgettext: no-wrap */
114    printf(_("\
115Usage: %s [OPTION...] [-f ENCODING] [-t ENCODING] [INPUTFILE...]\n"),
116           program_name);
117    /* xgettext: no-wrap */
118    printf(_("\
119or:    %s -l\n"),
120           program_name);
121    printf("\n");
122    /* xgettext: no-wrap */
123    printf(_("\
124Converts text from one encoding to another encoding.\n"));
125    printf("\n");
126    /* xgettext: no-wrap */
127    printf(_("\
128Options controlling the input and output format:\n"));
129    /* xgettext: no-wrap */
130    printf(_("\
131  -f ENCODING, --from-code=ENCODING\n\
132                              the encoding of the input\n"));
133    /* xgettext: no-wrap */
134    printf(_("\
135  -t ENCODING, --to-code=ENCODING\n\
136                              the encoding of the output\n"));
137    printf("\n");
138    /* xgettext: no-wrap */
139    printf(_("\
140Options controlling conversion problems:\n"));
141    /* xgettext: no-wrap */
142    printf(_("\
143  -c                          discard unconvertible characters\n"));
144    /* xgettext: no-wrap */
145    printf(_("\
146  --unicode-subst=FORMATSTRING\n\
147                              substitution for unconvertible Unicode characters\n"));
148    /* xgettext: no-wrap */
149    printf(_("\
150  --byte-subst=FORMATSTRING   substitution for unconvertible bytes\n"));
151    /* xgettext: no-wrap */
152    printf(_("\
153  --widechar-subst=FORMATSTRING\n\
154                              substitution for unconvertible wide characters\n"));
155    printf("\n");
156    /* xgettext: no-wrap */
157    printf(_("\
158Options controlling error output:\n"));
159    /* xgettext: no-wrap */
160    printf(_("\
161  -s, --silent                suppress error messages about conversion problems\n"));
162    printf("\n");
163    /* xgettext: no-wrap */
164    printf(_("\
165Informative output:\n"));
166    /* xgettext: no-wrap */
167    printf(_("\
168  -l, --list                  list the supported encodings\n"));
169    /* xgettext: no-wrap */
170    printf(_("\
171  --help                      display this help and exit\n"));
172    /* xgettext: no-wrap */
173    printf(_("\
174  --version                   output version information and exit\n"));
175    printf("\n");
176    fputs(_("Report bugs to <bug-gnu-libiconv@gnu.org>.\n"),stdout);
177  }
178  exit(exitcode);
179}
180
181static void print_version (void)
182{
183  printf("iconv (GNU libiconv %d.%d)\n",
184         _libiconv_version >> 8, _libiconv_version & 0xff);
185  printf("Copyright (C) %s Free Software Foundation, Inc.\n", "2000-2006");
186  printf(_("\
187This is free software; see the source for copying conditions.  There is NO\n\
188warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"));
189  printf(_("Written by %s.\n"),"Bruno Haible");
190  exit(EXIT_SUCCESS);
191}
192
193static int print_one (unsigned int namescount, const char * const * names,
194                      void* data)
195{
196  unsigned int i;
197  (void)data;
198  for (i = 0; i < namescount; i++) {
199    if (i > 0)
200      putc(' ',stdout);
201    fputs(names[i],stdout);
202  }
203  putc('\n',stdout);
204  return 0;
205}
206
207/* ========================================================================= */
208
209/* Line number and column position. */
210static unsigned int line;
211static unsigned int column;
212static const char* cjkcode;
213/* Update the line number and column position after a character was
214   successfully converted. */
215static void update_line_column (unsigned int uc, void* data)
216{
217  if (uc == 0x000A) {
218    line++;
219    column = 0;
220  } else {
221    int width = uc_width(uc, cjkcode);
222    if (width >= 0)
223      column += width;
224    else if (uc == 0x0009)
225      column += 8 - (column % 8);
226  }
227}
228
229/* ========================================================================= */
230
231/* Production of placeholder strings as fallback for unconvertible
232   characters. */
233
234/* Check that the argument is a format string taking either no argument
235   or exactly one unsigned integer argument. Returns the maximum output
236   size of the format string. */
237static size_t check_subst_formatstring (const char *format, const char *param_name)
238{
239  /* C format strings are described in POSIX (IEEE P1003.1 2001), section
240     XSH 3 fprintf().  See also Linux fprintf(3) manual page.
241     For simplicity, we don't accept
242       - the '%m$' reordering syntax,
243       - the 'I' flag,
244       - width specifications referring to an argument,
245       - precision specifications referring to an argument,
246       - size specifiers,
247       - format specifiers other than 'o', 'u', 'x', 'X'.
248     What remains?
249     A directive
250       - starts with '%',
251       - is optionally followed by any of the characters '#', '0', '-', ' ',
252         '+', "'", each of which acts as a flag,
253       - is optionally followed by a width specification: a nonempty digit
254         sequence,
255       - is optionally followed by '.' and a precision specification: a
256         nonempty digit sequence,
257       - is finished by a specifier
258         - '%', that needs no argument,
259         - 'o', 'u', 'x', 'X', that need an unsigned integer argument.
260   */
261  size_t maxsize = 0;
262  unsigned int unnumbered_arg_count = 0;
263
264  for (; *format != '\0';) {
265    if (*format++ == '%') {
266      /* A directive. */
267      unsigned int width = 0;
268      unsigned int precision = 0;
269      unsigned int length;
270      /* Parse flags. */
271      for (;;) {
272        if (*format == ' ' || *format == '+' || *format == '-'
273            || *format == '#' || *format == '0' || *format == '\'')
274          format++;
275        else
276          break;
277      }
278      /* Parse width. */
279      if (*format == '*')
280        error(EXIT_FAILURE,0,_("%s argument: A format directive with a variable width is not allowed here."),param_name);
281      if (isdigit (*format)) {
282        do {
283          width = 10*width + (*format - '0');
284          format++;
285        } while (isdigit (*format));
286      }
287      /* Parse precision. */
288      if (*format == '.') {
289        format++;
290        if (*format == '*')
291          error(EXIT_FAILURE,0,_("%s argument: A format directive with a variable precision is not allowed here."),param_name);
292        if (isdigit (*format)) {
293          do {
294            precision = 10*precision + (*format - '0');
295            format++;
296          } while (isdigit (*format));
297        }
298      }
299      /* Parse size. */
300      switch (*format) {
301        case 'h': case 'l': case 'L': case 'q':
302        case 'j': case 'z': case 'Z': case 't':
303          error(EXIT_FAILURE,0,_("%s argument: A format directive with a size is not allowed here."),param_name);
304      }
305      /* Parse end of directive. */
306      switch (*format) {
307        case '%':
308          length = 1;
309          break;
310        case 'u': case 'o': case 'x': case 'X':
311          if (*format == 'u') {
312            length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
313                                     * 0.30103 /* binary -> decimal */
314                                    )
315                     + 1; /* turn floor into ceil */
316            if (length < precision)
317              length = precision;
318            length *= 2; /* estimate for FLAG_GROUP */
319            length += 1; /* account for leading sign */
320          } else if (*format == 'o') {
321            length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
322                                     * 0.333334 /* binary -> octal */
323                                    )
324                     + 1; /* turn floor into ceil */
325            if (length < precision)
326              length = precision;
327            length += 1; /* account for leading sign */
328          } else { /* 'x', 'X' */
329            length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
330                                     * 0.25 /* binary -> hexadecimal */
331                                    )
332                     + 1; /* turn floor into ceil */
333            if (length < precision)
334              length = precision;
335            length += 2; /* account for leading sign or alternate form */
336          }
337          unnumbered_arg_count++;
338          break;
339        default:
340          if (*format == '\0')
341            error(EXIT_FAILURE,0,_("%s argument: The string ends in the middle of a directive."),param_name);
342          else if (c_isprint(*format))
343            error(EXIT_FAILURE,0,_("%s argument: The character '%c' is not a valid conversion specifier."),param_name,*format);
344          else
345            error(EXIT_FAILURE,0,_("%s argument: The character that terminates the format directive is not a valid conversion specifier."),param_name);
346          abort(); /*NOTREACHED*/
347      }
348      format++;
349      if (length < width)
350        length = width;
351      maxsize += length;
352    } else
353      maxsize++;
354  }
355  if (unnumbered_arg_count > 1)
356    error(EXIT_FAILURE,0,ngettext("%s argument: The format string consumes more than one argument: %u argument.",
357                                  "%s argument: The format string consumes more than one argument: %u arguments.",
358                                  unnumbered_arg_count),
359                         param_name,unnumbered_arg_count);
360  return maxsize;
361}
362
363/* Format strings. */
364static const char* ilseq_byte_subst;
365static const char* ilseq_wchar_subst;
366static const char* ilseq_unicode_subst;
367
368/* Maximum result size for each format string. */
369static size_t ilseq_byte_subst_size;
370static size_t ilseq_wchar_subst_size;
371static size_t ilseq_unicode_subst_size;
372
373/* Buffer of size ilseq_byte_subst_size+1. */
374static char* ilseq_byte_subst_buffer;
375#if HAVE_WCHAR_T
376/* Buffer of size ilseq_wchar_subst_size+1. */
377static char* ilseq_wchar_subst_buffer;
378#endif
379/* Buffer of size ilseq_unicode_subst_size+1. */
380static char* ilseq_unicode_subst_buffer;
381
382/* Auxiliary variables for subst_mb_to_uc_fallback. */
383/* Converter from locale encoding to UCS-4. */
384static iconv_t subst_mb_to_uc_cd;
385/* Buffer of size ilseq_byte_subst_size. */
386static unsigned int* subst_mb_to_uc_temp_buffer;
387
388static void subst_mb_to_uc_fallback
389            (const char* inbuf, size_t inbufsize,
390             void (*write_replacement) (const unsigned int *buf, size_t buflen,
391                                        void* callback_arg),
392             void* callback_arg,
393             void* data)
394{
395  for (; inbufsize > 0; inbuf++, inbufsize--) {
396    const char* inptr;
397    size_t inbytesleft;
398    char* outptr;
399    size_t outbytesleft;
400    sprintf(ilseq_byte_subst_buffer,
401            ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
402    inptr = ilseq_byte_subst_buffer;
403    inbytesleft = strlen(ilseq_byte_subst_buffer);
404    outptr = (char*)subst_mb_to_uc_temp_buffer;
405    outbytesleft = ilseq_byte_subst_size*sizeof(unsigned int);
406    iconv(subst_mb_to_uc_cd,NULL,NULL,NULL,NULL);
407    if (iconv(subst_mb_to_uc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
408        == (size_t)(-1)
409        || iconv(subst_mb_to_uc_cd, NULL,NULL, &outptr,&outbytesleft)
410           == (size_t)(-1))
411      error(EXIT_FAILURE,0,_("cannot convert byte substitution to Unicode: %s"),ilseq_byte_subst_buffer);
412    if (!(outbytesleft%sizeof(unsigned int) == 0))
413      abort();
414    write_replacement(subst_mb_to_uc_temp_buffer,
415                      ilseq_byte_subst_size-(outbytesleft/sizeof(unsigned int)),
416                      callback_arg);
417  }
418}
419
420/* Auxiliary variables for subst_uc_to_mb_fallback. */
421/* Converter from locale encoding to target encoding. */
422static iconv_t subst_uc_to_mb_cd;
423/* Buffer of size ilseq_unicode_subst_size*4. */
424static char* subst_uc_to_mb_temp_buffer;
425
426static void subst_uc_to_mb_fallback
427            (unsigned int code,
428             void (*write_replacement) (const char *buf, size_t buflen,
429                                        void* callback_arg),
430             void* callback_arg,
431             void* data)
432{
433  const char* inptr;
434  size_t inbytesleft;
435  char* outptr;
436  size_t outbytesleft;
437  sprintf(ilseq_unicode_subst_buffer, ilseq_unicode_subst, code);
438  inptr = ilseq_unicode_subst_buffer;
439  inbytesleft = strlen(ilseq_unicode_subst_buffer);
440  outptr = subst_uc_to_mb_temp_buffer;
441  outbytesleft = ilseq_unicode_subst_size*4;
442  iconv(subst_uc_to_mb_cd,NULL,NULL,NULL,NULL);
443  if (iconv(subst_uc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
444      == (size_t)(-1)
445      || iconv(subst_uc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
446         == (size_t)(-1))
447    error(EXIT_FAILURE,0,_("cannot convert unicode substitution to target encoding: %s"),ilseq_unicode_subst_buffer);
448  write_replacement(subst_uc_to_mb_temp_buffer,
449                    ilseq_unicode_subst_size*4-outbytesleft,
450                    callback_arg);
451}
452
453#if HAVE_WCHAR_T
454
455/* Auxiliary variables for subst_mb_to_wc_fallback. */
456/* Converter from locale encoding to wchar_t. */
457static iconv_t subst_mb_to_wc_cd;
458/* Buffer of size ilseq_byte_subst_size. */
459static wchar_t* subst_mb_to_wc_temp_buffer;
460
461static void subst_mb_to_wc_fallback
462            (const char* inbuf, size_t inbufsize,
463             void (*write_replacement) (const wchar_t *buf, size_t buflen,
464                                        void* callback_arg),
465             void* callback_arg,
466             void* data)
467{
468  for (; inbufsize > 0; inbuf++, inbufsize--) {
469    const char* inptr;
470    size_t inbytesleft;
471    char* outptr;
472    size_t outbytesleft;
473    sprintf(ilseq_byte_subst_buffer,
474            ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
475    inptr = ilseq_byte_subst_buffer;
476    inbytesleft = strlen(ilseq_byte_subst_buffer);
477    outptr = (char*)subst_mb_to_wc_temp_buffer;
478    outbytesleft = ilseq_byte_subst_size*sizeof(wchar_t);
479    iconv(subst_mb_to_wc_cd,NULL,NULL,NULL,NULL);
480    if (iconv(subst_mb_to_wc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
481        == (size_t)(-1)
482        || iconv(subst_mb_to_wc_cd, NULL,NULL, &outptr,&outbytesleft)
483           == (size_t)(-1))
484      error(EXIT_FAILURE,0,_("cannot convert byte substitution to wide string: %s"),ilseq_byte_subst_buffer);
485    if (!(outbytesleft%sizeof(wchar_t) == 0))
486      abort();
487    write_replacement(subst_mb_to_wc_temp_buffer,
488                      ilseq_byte_subst_size-(outbytesleft/sizeof(wchar_t)),
489                      callback_arg);
490  }
491}
492
493/* Auxiliary variables for subst_wc_to_mb_fallback. */
494/* Converter from locale encoding to target encoding. */
495static iconv_t subst_wc_to_mb_cd;
496/* Buffer of size ilseq_wchar_subst_size*4.
497   Hardcode factor 4, because MB_LEN_MAX is not reliable on some platforms. */
498static char* subst_wc_to_mb_temp_buffer;
499
500static void subst_wc_to_mb_fallback
501            (wchar_t code,
502             void (*write_replacement) (const char *buf, size_t buflen,
503                                        void* callback_arg),
504             void* callback_arg,
505             void* data)
506{
507  const char* inptr;
508  size_t inbytesleft;
509  char* outptr;
510  size_t outbytesleft;
511  sprintf(ilseq_wchar_subst_buffer, ilseq_wchar_subst, (unsigned int) code);
512  inptr = ilseq_wchar_subst_buffer;
513  inbytesleft = strlen(ilseq_wchar_subst_buffer);
514  outptr = subst_wc_to_mb_temp_buffer;
515  outbytesleft = ilseq_wchar_subst_size*4;
516  iconv(subst_wc_to_mb_cd,NULL,NULL,NULL,NULL);
517  if (iconv(subst_wc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
518      == (size_t)(-1)
519      || iconv(subst_wc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
520         == (size_t)(-1))
521    error(EXIT_FAILURE,0,_("cannot convert widechar substitution to target encoding: %s"),ilseq_wchar_subst_buffer);
522  write_replacement(subst_wc_to_mb_temp_buffer,
523                    ilseq_wchar_subst_size*4-outbytesleft,
524                    callback_arg);
525}
526
527#else
528
529#define subst_mb_to_wc_fallback NULL
530#define subst_wc_to_mb_fallback NULL
531
532#endif
533
534/* Auxiliary variables for subst_mb_to_mb_fallback. */
535/* Converter from locale encoding to target encoding. */
536static iconv_t subst_mb_to_mb_cd;
537/* Buffer of size ilseq_byte_subst_size*4. */
538static char* subst_mb_to_mb_temp_buffer;
539
540static void subst_mb_to_mb_fallback (const char* inbuf, size_t inbufsize)
541{
542  for (; inbufsize > 0; inbuf++, inbufsize--) {
543    const char* inptr;
544    size_t inbytesleft;
545    char* outptr;
546    size_t outbytesleft;
547    sprintf(ilseq_byte_subst_buffer,
548            ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
549    inptr = ilseq_byte_subst_buffer;
550    inbytesleft = strlen(ilseq_byte_subst_buffer);
551    outptr = subst_mb_to_mb_temp_buffer;
552    outbytesleft = ilseq_byte_subst_size*4;
553    iconv(subst_mb_to_mb_cd,NULL,NULL,NULL,NULL);
554    if (iconv(subst_mb_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
555        == (size_t)(-1)
556        || iconv(subst_mb_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
557           == (size_t)(-1))
558      error(EXIT_FAILURE,0,_("cannot convert byte substitution to target encoding: %s"),ilseq_byte_subst_buffer);
559    fwrite(subst_mb_to_mb_temp_buffer,1,ilseq_byte_subst_size*4-outbytesleft,
560           stdout);
561  }
562}
563
564/* ========================================================================= */
565
566static int convert (iconv_t cd, FILE* infile, const char* infilename)
567{
568  char inbuf[4096+4096];
569  size_t inbufrest = 0;
570  char initial_outbuf[4096];
571  char *outbuf = initial_outbuf;
572  size_t outbufsize = sizeof(initial_outbuf);
573  int status = 0;
574
575#if O_BINARY
576  SET_BINARY(fileno(infile));
577#endif
578  line = 1; column = 0;
579  iconv(cd,NULL,NULL,NULL,NULL);
580  for (;;) {
581    size_t inbufsize = fread(inbuf+4096,1,4096,infile);
582    if (inbufsize == 0) {
583      if (inbufrest == 0)
584        break;
585      else {
586        if (ilseq_byte_subst != NULL)
587          subst_mb_to_mb_fallback(inbuf+4096-inbufrest, inbufrest);
588        if (!silent) {
589          fflush(stdout);
590          if (column > 0)
591            putc('\n',stderr);
592          error(0,0,_("%s:%u:%u: incomplete character or shift sequence"),infilename,line,column);
593        }
594        status = 1;
595        goto done;
596      }
597    } else {
598      const char* inptr = inbuf+4096-inbufrest;
599      size_t insize = inbufrest+inbufsize;
600      inbufrest = 0;
601      while (insize > 0) {
602        char* outptr = outbuf;
603        size_t outsize = outbufsize;
604        size_t res = iconv(cd,(ICONV_CONST char**)&inptr,&insize,&outptr,&outsize);
605        if (outptr != outbuf) {
606          int saved_errno = errno;
607          if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
608            status = 1;
609            goto done;
610          }
611          errno = saved_errno;
612        }
613        if (res == (size_t)(-1)) {
614          if (errno == EILSEQ) {
615            if (discard_unconvertible == 1) {
616              int one = 1;
617              iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
618              discard_unconvertible = 2;
619              status = 1;
620            } else {
621              if (!silent) {
622                fflush(stdout);
623                if (column > 0)
624                  putc('\n',stderr);
625                error(0,0,_("%s:%u:%u: cannot convert"),infilename,line,column);
626              }
627              status = 1;
628              goto done;
629            }
630          } else if (errno == EINVAL) {
631            if (inbufsize == 0 || insize > 4096) {
632              if (!silent) {
633                fflush(stdout);
634                if (column > 0)
635                  putc('\n',stderr);
636                error(0,0,_("%s:%u:%u: incomplete character or shift sequence"),infilename,line,column);
637              }
638              status = 1;
639              goto done;
640            } else {
641              inbufrest = insize;
642              if (insize > 0) {
643                /* Like memcpy(inbuf+4096-insize,inptr,insize), except that
644                   we cannot use memcpy here, because source and destination
645                   regions may overlap. */
646                char* restptr = inbuf+4096-insize;
647                do { *restptr++ = *inptr++; } while (--insize > 0);
648              }
649              break;
650            }
651          } else if (errno == E2BIG) {
652            if (outptr==outbuf) {
653              /* outbuf is too small. Double its size. */
654              if (outbuf != initial_outbuf)
655                free(outbuf);
656              outbufsize = 2*outbufsize;
657              if (outbufsize==0) /* integer overflow? */
658                xalloc_die();
659              outbuf = (char*)xmalloc(outbufsize);
660            }
661          } else {
662            if (!silent) {
663              int saved_errno = errno;
664              fflush(stdout);
665              if (column > 0)
666                putc('\n',stderr);
667              error(0,saved_errno,_("%s:%u:%u"),infilename,line,column);
668            }
669            status = 1;
670            goto done;
671          }
672        }
673      }
674    }
675  }
676  for (;;) {
677    char* outptr = outbuf;
678    size_t outsize = outbufsize;
679    size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
680    if (outptr != outbuf) {
681      int saved_errno = errno;
682      if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
683        status = 1;
684        goto done;
685      }
686      errno = saved_errno;
687    }
688    if (res == (size_t)(-1)) {
689      if (errno == EILSEQ) {
690        if (discard_unconvertible == 1) {
691          int one = 1;
692          iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
693          discard_unconvertible = 2;
694          status = 1;
695        } else {
696          if (!silent) {
697            fflush(stdout);
698            if (column > 0)
699              putc('\n',stderr);
700            error(0,0,_("%s:%u:%u: cannot convert"),infilename,line,column);
701          }
702          status = 1;
703          goto done;
704        }
705      } else if (errno == EINVAL) {
706        if (!silent) {
707          fflush(stdout);
708          if (column > 0)
709            putc('\n',stderr);
710          error(0,0,_("%s:%u:%u: incomplete character or shift sequence"),infilename,line,column);
711        }
712        status = 1;
713        goto done;
714      } else if (errno == E2BIG) {
715        if (outptr==outbuf) {
716          /* outbuf is too small. Double its size. */
717          if (outbuf != initial_outbuf)
718            free(outbuf);
719          outbufsize = 2*outbufsize;
720          if (outbufsize==0) /* integer overflow? */
721            xalloc_die();
722          outbuf = (char*)xmalloc(outbufsize);
723        }
724      } else {
725        if (!silent) {
726          int saved_errno = errno;
727          fflush(stdout);
728          if (column > 0)
729            putc('\n',stderr);
730          error(0,saved_errno,_("%s:%u:%u"),infilename,line,column);
731        }
732        status = 1;
733        goto done;
734      }
735    } else
736      break;
737  }
738  if (ferror(infile)) {
739    fflush(stdout);
740    if (column > 0)
741      putc('\n',stderr);
742    error(0,0,_("%s: I/O error"),infilename);
743    status = 1;
744    goto done;
745  }
746 done:
747  if (outbuf != initial_outbuf)
748    free(outbuf);
749  return status;
750}
751
752/* ========================================================================= */
753
754int main (int argc, char* argv[])
755{
756  const char* fromcode = NULL;
757  const char* tocode = NULL;
758  int do_list = 0;
759  iconv_t cd;
760  struct iconv_fallbacks fallbacks;
761  struct iconv_hooks hooks;
762  int i;
763  int status;
764
765  set_program_name (argv[0]);
766#if HAVE_SETLOCALE
767  /* Needed for the locale dependent encodings, "char" and "wchar_t",
768     and for gettext. */
769  setlocale(LC_CTYPE,"");
770#if ENABLE_NLS
771  /* Needed for gettext. */
772  setlocale(LC_MESSAGES,"");
773#endif
774#endif
775#if ENABLE_NLS
776  bindtextdomain("libiconv",relocate(LOCALEDIR));
777  textdomain("libiconv");
778#endif
779  for (i = 1; i < argc;) {
780    size_t len = strlen(argv[i]);
781    if (!strcmp(argv[i],"--")) {
782      i++;
783      break;
784    }
785    if (!strcmp(argv[i],"-f")
786        /* --f ... --from-code */
787        || (len >= 3 && len <= 11 && !strncmp(argv[i],"--from-code",len))
788        /* --from-code=... */
789        || (len >= 12 && !strncmp(argv[i],"--from-code=",12))) {
790      if (len < 12)
791        if (i == argc-1) usage(1);
792      if (fromcode != NULL) usage(1);
793      if (len < 12) {
794        fromcode = argv[i+1];
795        i += 2;
796      } else {
797        fromcode = argv[i]+12;
798        i++;
799      }
800      continue;
801    }
802    if (!strcmp(argv[i],"-t")
803        /* --t ... --to-code */
804        || (len >= 3 && len <= 9 && !strncmp(argv[i],"--to-code",len))
805        /* --from-code=... */
806        || (len >= 10 && !strncmp(argv[i],"--to-code=",10))) {
807      if (len < 10)
808        if (i == argc-1) usage(1);
809      if (tocode != NULL) usage(1);
810      if (len < 10) {
811        tocode = argv[i+1];
812        i += 2;
813      } else {
814        tocode = argv[i]+10;
815        i++;
816      }
817      continue;
818    }
819    if (!strcmp(argv[i],"-l")
820        /* --l ... --list */
821        || (len >= 3 && len <= 6 && !strncmp(argv[i],"--list",len))) {
822      do_list = 1;
823      i++;
824      continue;
825    }
826    if (/* --by ... --byte-subst */
827        (len >= 4 && len <= 12 && !strncmp(argv[i],"--byte-subst",len))
828        /* --byte-subst=... */
829        || (len >= 13 && !strncmp(argv[i],"--byte-subst=",13))) {
830      if (len < 13) {
831        if (i == argc-1) usage(1);
832        ilseq_byte_subst = argv[i+1];
833        i += 2;
834      } else {
835        ilseq_byte_subst = argv[i]+13;
836        i++;
837      }
838      ilseq_byte_subst_size =
839        check_subst_formatstring(ilseq_byte_subst, "--byte-subst");
840      continue;
841    }
842    if (/* --w ... --widechar-subst */
843        (len >= 3 && len <= 16 && !strncmp(argv[i],"--widechar-subst",len))
844        /* --widechar-subst=... */
845        || (len >= 17 && !strncmp(argv[i],"--widechar-subst=",17))) {
846      if (len < 17) {
847        if (i == argc-1) usage(1);
848        ilseq_wchar_subst = argv[i+1];
849        i += 2;
850      } else {
851        ilseq_wchar_subst = argv[i]+17;
852        i++;
853      }
854      ilseq_wchar_subst_size =
855        check_subst_formatstring(ilseq_wchar_subst, "--widechar-subst");
856      continue;
857    }
858    if (/* --u ... --unicode-subst */
859        (len >= 3 && len <= 15 && !strncmp(argv[i],"--unicode-subst",len))
860        /* --unicode-subst=... */
861        || (len >= 16 && !strncmp(argv[i],"--unicode-subst=",16))) {
862      if (len < 16) {
863        if (i == argc-1) usage(1);
864        ilseq_unicode_subst = argv[i+1];
865        i += 2;
866      } else {
867        ilseq_unicode_subst = argv[i]+16;
868        i++;
869      }
870      ilseq_unicode_subst_size =
871        check_subst_formatstring(ilseq_unicode_subst, "--unicode-subst");
872      continue;
873    }
874    if /* --s ... --silent */
875       (len >= 3 && len <= 8 && !strncmp(argv[i],"--silent",len)) {
876      silent = 1;
877      continue;
878    }
879    if /* --h ... --help */
880       (len >= 3 && len <= 6 && !strncmp(argv[i],"--help",len)) {
881      usage(0);
882    }
883    if /* --v ... --version */
884       (len >= 3 && len <= 9 && !strncmp(argv[i],"--version",len)) {
885      print_version();
886    }
887#if O_BINARY
888    /* Backward compatibility with iconv <= 1.9.1. */
889    if /* --bi ... --binary */
890       (len >= 4 && len <= 8 && !strncmp(argv[i],"--binary",len)) {
891      i++;
892      continue;
893    }
894#endif
895    if (argv[i][0] == '-') {
896      const char *option = argv[i] + 1;
897      if (*option == '\0')
898        usage(1);
899      if (!strcmp(option,"-")) { /* handle -- option delimiter */
900	i++;
901	break;
902      }
903      for (; *option; option++)
904        switch (*option) {
905          case 'c': discard_unconvertible = 1; break;
906          case 's': silent = 1; break;
907          default: usage(1);
908        }
909      i++;
910      continue;
911    }
912    break;
913  }
914  if (do_list) {
915    if (i != 2 || i != argc)
916      usage(1);
917    iconvlist(print_one,NULL);
918    status = 0;
919  } else {
920#if O_BINARY
921    SET_BINARY(fileno(stdout));
922#endif
923    if (fromcode == NULL)
924      fromcode = "char";
925    if (tocode == NULL)
926      tocode = "char";
927    cd = iconv_open(tocode,fromcode);
928    if (cd == (iconv_t)(-1)) {
929      if (iconv_open("UCS-4",fromcode) == (iconv_t)(-1))
930        error(0,0,_("conversion from %s unsupported"),fromcode);
931      else if (iconv_open(tocode,"UCS-4") == (iconv_t)(-1))
932        error(0,0,_("conversion to %s unsupported"),tocode);
933      else
934        error(0,0,_("conversion from %s to %s unsupported"),fromcode,tocode);
935      error(EXIT_FAILURE,0,_("try '%s -l' to get the list of supported encodings"),program_name);
936    }
937    /* Look at fromcode and tocode, to determine whether character widths
938       should be determined according to legacy CJK conventions. */
939    cjkcode = iconv_canonicalize(tocode);
940    if (!is_cjk_encoding(cjkcode))
941      cjkcode = iconv_canonicalize(fromcode);
942    /* Set up fallback routines for handling impossible conversions. */
943    if (ilseq_byte_subst != NULL)
944      ilseq_byte_subst_buffer = (char*)xmalloc((ilseq_byte_subst_size+1)*sizeof(char));
945    if (!discard_unconvertible) {
946      #if HAVE_WCHAR_T
947      if (ilseq_wchar_subst != NULL)
948        ilseq_wchar_subst_buffer = (char*)xmalloc((ilseq_wchar_subst_size+1)*sizeof(char));
949      #endif
950      if (ilseq_unicode_subst != NULL)
951        ilseq_unicode_subst_buffer = (char*)xmalloc((ilseq_unicode_subst_size+1)*sizeof(char));
952      if (ilseq_byte_subst != NULL) {
953        subst_mb_to_uc_cd = iconv_open("UCS-4-INTERNAL","char");
954        subst_mb_to_uc_temp_buffer = (unsigned int*)xmalloc(ilseq_byte_subst_size*sizeof(unsigned int));
955        #if HAVE_WCHAR_T
956        subst_mb_to_wc_cd = iconv_open("wchar_t","char");
957        subst_mb_to_wc_temp_buffer = (wchar_t*)xmalloc(ilseq_byte_subst_size*sizeof(wchar_t));
958        #endif
959        subst_mb_to_mb_cd = iconv_open(tocode,"char");
960        subst_mb_to_mb_temp_buffer = (char*)xmalloc(ilseq_byte_subst_size*4);
961      }
962      #if HAVE_WCHAR_T
963      if (ilseq_wchar_subst != NULL) {
964        subst_wc_to_mb_cd = iconv_open(tocode,"char");
965        subst_wc_to_mb_temp_buffer = (char*)xmalloc(ilseq_wchar_subst_size*4);
966      }
967      #endif
968      if (ilseq_unicode_subst != NULL) {
969        subst_uc_to_mb_cd = iconv_open(tocode,"char");
970        subst_uc_to_mb_temp_buffer = (char*)xmalloc(ilseq_unicode_subst_size*4);
971      }
972      fallbacks.mb_to_uc_fallback =
973        (ilseq_byte_subst != NULL ? subst_mb_to_uc_fallback : NULL);
974      fallbacks.uc_to_mb_fallback =
975        (ilseq_unicode_subst != NULL ? subst_uc_to_mb_fallback : NULL);
976      fallbacks.mb_to_wc_fallback =
977        (ilseq_byte_subst != NULL ? subst_mb_to_wc_fallback : NULL);
978      fallbacks.wc_to_mb_fallback =
979        (ilseq_wchar_subst != NULL ? subst_wc_to_mb_fallback : NULL);
980      fallbacks.data = NULL;
981      iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks);
982    }
983    /* Set up hooks for updating the line and column position. */
984    hooks.uc_hook = update_line_column;
985    hooks.wc_hook = NULL;
986    hooks.data = NULL;
987    iconvctl(cd, ICONV_SET_HOOKS, &hooks);
988    if (i == argc)
989      status = convert(cd,stdin,_("(stdin)"));
990    else {
991      status = 0;
992      for (; i < argc; i++) {
993        const char* infilename = argv[i];
994        FILE* infile = fopen(infilename,"r");
995        if (infile == NULL) {
996          int saved_errno = errno;
997          error(0,saved_errno,_("%s"),infilename);
998          status = 1;
999        } else {
1000          status |= convert(cd,infile,infilename);
1001          fclose(infile);
1002        }
1003      }
1004    }
1005    iconv_close(cd);
1006  }
1007  if (ferror(stdout) || fclose(stdout)) {
1008    error(0,0,_("I/O error"));
1009    status = 1;
1010  }
1011  exit(status);
1012}
1013