1/* Copyright (C) 2000-2009, 2011 Free Software Foundation, Inc.
2   This file is part of the GNU LIBICONV Library.
3
4   This program is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16
17#include "config.h"
18#ifndef ICONV_CONST
19# define ICONV_CONST
20#endif
21
22#include <limits.h>
23#include <stddef.h>
24#include <stdio.h>
25#include <stdlib.h>
26#include <string.h>
27#include <iconv.h>
28#include <errno.h>
29#include <locale.h>
30#include <fcntl.h>
31
32/* Ensure that iconv_no_i18n does not depend on libintl.  */
33#ifdef NO_I18N
34# undef ENABLE_NLS
35# undef ENABLE_RELOCATABLE
36#endif
37
38#include "binary-io.h"
39#include "progname.h"
40#include "relocatable.h"
41#include "safe-read.h"
42#include "xalloc.h"
43#include "uniwidth.h"
44#include "uniwidth/cjk.h"
45
46/* Ensure that iconv_no_i18n does not depend on libintl.  */
47#ifdef NO_I18N
48#include <stdarg.h>
49static void
50error (int status, int errnum, const char *message, ...)
51{
52  va_list args;
53
54  fflush(stdout);
55  fprintf(stderr,"%s: ",program_name);
56  va_start(args,message);
57  vfprintf(stderr,message,args);
58  va_end(args);
59  if (errnum) {
60    const char *s = strerror(errnum);
61    if (s == NULL)
62      s = "Unknown system error";
63  }
64  putc('\n',stderr);
65  fflush(stderr);
66  if (status)
67    exit(status);
68}
69#else
70# include "error.h"
71#endif
72
73#include "gettext.h"
74
75#define _(str) gettext(str)
76
77/* Ensure that iconv_no_i18n does not depend on libintl.  */
78#ifdef NO_I18N
79# define xmalloc malloc
80# define xalloc_die abort
81#endif
82
83/* Locale independent test for a decimal digit.
84   Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
85   <ctype.h> isdigit must be an 'unsigned char'.)  */
86#undef isdigit
87#define isdigit(c) ((unsigned int) ((c) - '0') < 10)
88
89/* Locale independent test for a printable character.
90   Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
91   <ctype.h> isdigit must be an 'unsigned char'.)  */
92#define c_isprint(c) ((c) >= ' ' && (c) <= '~')
93
94/* ========================================================================= */
95
96static int discard_unconvertible = 0;
97static int silent = 0;
98
99static void usage (int exitcode)
100{
101  if (exitcode != 0) {
102    const char* helpstring1 =
103      /* TRANSLATORS: The first line of the short usage message.  */
104      _("Usage: iconv [-c] [-s] [-f fromcode] [-t tocode] [file ...]");
105    const char* helpstring2 =
106      /* TRANSLATORS: The second line of the short usage message.
107         Align it correctly against the first line.  */
108      _("or:    iconv -l");
109    fprintf(stderr, "%s\n%s\n", helpstring1, helpstring2);
110    fprintf(stderr, _("Try `%s --help' for more information.\n"), program_name);
111  } else {
112    /* xgettext: no-wrap */
113    /* TRANSLATORS: The first line of the long usage message.
114       The %s placeholder expands to the program name.  */
115    printf(_("\
116Usage: %s [OPTION...] [-f ENCODING] [-t ENCODING] [INPUTFILE...]\n"),
117           program_name);
118    /* xgettext: no-wrap */
119    /* TRANSLATORS: The second line of the long usage message.
120       Align it correctly against the first line.
121       The %s placeholder expands to the program name.  */
122    printf(_("\
123or:    %s -l\n"),
124           program_name);
125    printf("\n");
126    /* xgettext: no-wrap */
127    /* TRANSLATORS: Description of the iconv program.  */
128    printf(_("\
129Converts text from one encoding to another encoding.\n"));
130    printf("\n");
131    /* xgettext: no-wrap */
132    printf(_("\
133Options controlling the input and output format:\n"));
134    /* xgettext: no-wrap */
135    printf(_("\
136  -f ENCODING, --from-code=ENCODING\n\
137                              the encoding of the input\n"));
138    /* xgettext: no-wrap */
139    printf(_("\
140  -t ENCODING, --to-code=ENCODING\n\
141                              the encoding of the output\n"));
142    printf("\n");
143    /* xgettext: no-wrap */
144    printf(_("\
145Options controlling conversion problems:\n"));
146    /* xgettext: no-wrap */
147    printf(_("\
148  -c                          discard unconvertible characters\n"));
149    /* xgettext: no-wrap */
150    printf(_("\
151  --unicode-subst=FORMATSTRING\n\
152                              substitution for unconvertible Unicode characters\n"));
153    /* xgettext: no-wrap */
154    printf(_("\
155  --byte-subst=FORMATSTRING   substitution for unconvertible bytes\n"));
156    /* xgettext: no-wrap */
157    printf(_("\
158  --widechar-subst=FORMATSTRING\n\
159                              substitution for unconvertible wide characters\n"));
160    printf("\n");
161    /* xgettext: no-wrap */
162    printf(_("\
163Options controlling error output:\n"));
164    /* xgettext: no-wrap */
165    printf(_("\
166  -s, --silent                suppress error messages about conversion problems\n"));
167    printf("\n");
168    /* xgettext: no-wrap */
169    printf(_("\
170Informative output:\n"));
171    /* xgettext: no-wrap */
172    printf(_("\
173  -l, --list                  list the supported encodings\n"));
174    /* xgettext: no-wrap */
175    printf(_("\
176  --help                      display this help and exit\n"));
177    /* xgettext: no-wrap */
178    printf(_("\
179  --version                   output version information and exit\n"));
180    printf("\n");
181    /* TRANSLATORS: The placeholder indicates the bug-reporting address
182       for this package.  Please add _another line_ saying
183       "Report translation bugs to <...>\n" with the address for translation
184       bugs (typically your translation team's web or email address).  */
185    fputs(_("Report bugs to <bug-gnu-libiconv@gnu.org>.\n"),stdout);
186  }
187  exit(exitcode);
188}
189
190static void print_version (void)
191{
192  printf("iconv (GNU libiconv %d.%d)\n",
193         _libiconv_version >> 8, _libiconv_version & 0xff);
194  printf("Copyright (C) %s Free Software Foundation, Inc.\n", "2000-2011");
195  /* xgettext: no-wrap */
196  fputs (_("\
197License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
198This is free software: you are free to change and redistribute it.\n\
199There is NO WARRANTY, to the extent permitted by law.\n\
200"),stdout);
201  /* TRANSLATORS: The %s placeholder expands to an author's name.  */
202  printf(_("Written by %s.\n"),"Bruno Haible");
203  exit(EXIT_SUCCESS);
204}
205
206static int print_one (unsigned int namescount, const char * const * names,
207                      void* data)
208{
209  unsigned int i;
210  (void)data;
211  for (i = 0; i < namescount; i++) {
212    if (i > 0)
213      putc(' ',stdout);
214    fputs(names[i],stdout);
215  }
216  putc('\n',stdout);
217  return 0;
218}
219
220/* ========================================================================= */
221
222/* Line number and column position. */
223static unsigned int line;
224static unsigned int column;
225static const char* cjkcode;
226/* Update the line number and column position after a character was
227   successfully converted. */
228static void update_line_column (unsigned int uc, void* data)
229{
230  if (uc == 0x000A) {
231    line++;
232    column = 0;
233  } else {
234    int width = uc_width(uc, cjkcode);
235    if (width >= 0)
236      column += width;
237    else if (uc == 0x0009)
238      column += 8 - (column % 8);
239  }
240}
241
242/* ========================================================================= */
243
244/* Production of placeholder strings as fallback for unconvertible
245   characters. */
246
247/* Check that the argument is a format string taking either no argument
248   or exactly one unsigned integer argument. Returns the maximum output
249   size of the format string. */
250static size_t check_subst_formatstring (const char *format, const char *param_name)
251{
252  /* C format strings are described in POSIX (IEEE P1003.1 2001), section
253     XSH 3 fprintf().  See also Linux fprintf(3) manual page.
254     For simplicity, we don't accept
255       - the '%m$' reordering syntax,
256       - the 'I' flag,
257       - width specifications referring to an argument,
258       - precision specifications referring to an argument,
259       - size specifiers,
260       - format specifiers other than 'o', 'u', 'x', 'X'.
261     What remains?
262     A directive
263       - starts with '%',
264       - is optionally followed by any of the characters '#', '0', '-', ' ',
265         '+', "'", each of which acts as a flag,
266       - is optionally followed by a width specification: a nonempty digit
267         sequence,
268       - is optionally followed by '.' and a precision specification: a
269         nonempty digit sequence,
270       - is finished by a specifier
271         - '%', that needs no argument,
272         - 'o', 'u', 'x', 'X', that need an unsigned integer argument.
273   */
274  size_t maxsize = 0;
275  unsigned int unnumbered_arg_count = 0;
276
277  for (; *format != '\0';) {
278    if (*format++ == '%') {
279      /* A directive. */
280      unsigned int width = 0;
281      unsigned int precision = 0;
282      unsigned int length;
283      /* Parse flags. */
284      for (;;) {
285        if (*format == ' ' || *format == '+' || *format == '-'
286            || *format == '#' || *format == '0' || *format == '\'')
287          format++;
288        else
289          break;
290      }
291      /* Parse width. */
292      if (*format == '*')
293        error(EXIT_FAILURE,0,
294              /* TRANSLATORS: An error message.
295                 The %s placeholder expands to a command-line option.  */
296              _("%s argument: A format directive with a variable width is not allowed here."),
297              param_name);
298      if (isdigit (*format)) {
299        do {
300          width = 10*width + (*format - '0');
301          format++;
302        } while (isdigit (*format));
303      }
304      /* Parse precision. */
305      if (*format == '.') {
306        format++;
307        if (*format == '*')
308          error(EXIT_FAILURE,0,
309                /* TRANSLATORS: An error message.
310                   The %s placeholder expands to a command-line option.  */
311                _("%s argument: A format directive with a variable precision is not allowed here."),
312                param_name);
313        if (isdigit (*format)) {
314          do {
315            precision = 10*precision + (*format - '0');
316            format++;
317          } while (isdigit (*format));
318        }
319      }
320      /* Parse size. */
321      switch (*format) {
322        case 'h': case 'l': case 'L': case 'q':
323        case 'j': case 'z': case 'Z': case 't':
324          error(EXIT_FAILURE,0,
325                /* TRANSLATORS: An error message.
326                   The %s placeholder expands to a command-line option.  */
327                _("%s argument: A format directive with a size is not allowed here."),
328                param_name);
329      }
330      /* Parse end of directive. */
331      switch (*format) {
332        case '%':
333          length = 1;
334          break;
335        case 'u': case 'o': case 'x': case 'X':
336          if (*format == 'u') {
337            length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
338                                     * 0.30103 /* binary -> decimal */
339                                    )
340                     + 1; /* turn floor into ceil */
341            if (length < precision)
342              length = precision;
343            length *= 2; /* estimate for FLAG_GROUP */
344            length += 1; /* account for leading sign */
345          } else if (*format == 'o') {
346            length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
347                                     * 0.333334 /* binary -> octal */
348                                    )
349                     + 1; /* turn floor into ceil */
350            if (length < precision)
351              length = precision;
352            length += 1; /* account for leading sign */
353          } else { /* 'x', 'X' */
354            length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
355                                     * 0.25 /* binary -> hexadecimal */
356                                    )
357                     + 1; /* turn floor into ceil */
358            if (length < precision)
359              length = precision;
360            length += 2; /* account for leading sign or alternate form */
361          }
362          unnumbered_arg_count++;
363          break;
364        default:
365          if (*format == '\0')
366            error(EXIT_FAILURE,0,
367                  /* TRANSLATORS: An error message.
368                     The %s placeholder expands to a command-line option.  */
369                  _("%s argument: The string ends in the middle of a directive."),
370                  param_name);
371          else if (c_isprint(*format))
372            error(EXIT_FAILURE,0,
373                  /* TRANSLATORS: An error message.
374                     The %s placeholder expands to a command-line option.
375                     The %c placeholder expands to an unknown format directive.  */
376                  _("%s argument: The character '%c' is not a valid conversion specifier."),
377                  param_name,*format);
378          else
379            error(EXIT_FAILURE,0,
380                  /* TRANSLATORS: An error message.
381                     The %s placeholder expands to a command-line option.  */
382                  _("%s argument: The character that terminates the format directive is not a valid conversion specifier."),
383                  param_name);
384          abort(); /*NOTREACHED*/
385      }
386      format++;
387      if (length < width)
388        length = width;
389      maxsize += length;
390    } else
391      maxsize++;
392  }
393  if (unnumbered_arg_count > 1)
394    error(EXIT_FAILURE,0,
395          /* TRANSLATORS: An error message.
396             The %s placeholder expands to a command-line option.
397             The %u placeholder expands to the number of arguments consumed by the format string.  */
398          ngettext("%s argument: The format string consumes more than one argument: %u argument.",
399                   "%s argument: The format string consumes more than one argument: %u arguments.",
400                   unnumbered_arg_count),
401          param_name,unnumbered_arg_count);
402  return maxsize;
403}
404
405/* Format strings. */
406static const char* ilseq_byte_subst;
407static const char* ilseq_wchar_subst;
408static const char* ilseq_unicode_subst;
409
410/* Maximum result size for each format string. */
411static size_t ilseq_byte_subst_size;
412static size_t ilseq_wchar_subst_size;
413static size_t ilseq_unicode_subst_size;
414
415/* Buffer of size ilseq_byte_subst_size+1. */
416static char* ilseq_byte_subst_buffer;
417#if HAVE_WCHAR_T
418/* Buffer of size ilseq_wchar_subst_size+1. */
419static char* ilseq_wchar_subst_buffer;
420#endif
421/* Buffer of size ilseq_unicode_subst_size+1. */
422static char* ilseq_unicode_subst_buffer;
423
424/* Auxiliary variables for subst_mb_to_uc_fallback. */
425/* Converter from locale encoding to UCS-4. */
426static iconv_t subst_mb_to_uc_cd;
427/* Buffer of size ilseq_byte_subst_size. */
428static unsigned int* subst_mb_to_uc_temp_buffer;
429
430static void subst_mb_to_uc_fallback
431            (const char* inbuf, size_t inbufsize,
432             void (*write_replacement) (const unsigned int *buf, size_t buflen,
433                                        void* callback_arg),
434             void* callback_arg,
435             void* data)
436{
437  for (; inbufsize > 0; inbuf++, inbufsize--) {
438    const char* inptr;
439    size_t inbytesleft;
440    char* outptr;
441    size_t outbytesleft;
442    sprintf(ilseq_byte_subst_buffer,
443            ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
444    inptr = ilseq_byte_subst_buffer;
445    inbytesleft = strlen(ilseq_byte_subst_buffer);
446    outptr = (char*)subst_mb_to_uc_temp_buffer;
447    outbytesleft = ilseq_byte_subst_size*sizeof(unsigned int);
448    iconv(subst_mb_to_uc_cd,NULL,NULL,NULL,NULL);
449    if (iconv(subst_mb_to_uc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
450        == (size_t)(-1)
451        || iconv(subst_mb_to_uc_cd, NULL,NULL, &outptr,&outbytesleft)
452           == (size_t)(-1))
453      error(EXIT_FAILURE,0,
454            /* TRANSLATORS: An error message.
455               The %s placeholder expands to a piece of text, specified through --byte-subst.  */
456            _("cannot convert byte substitution to Unicode: %s"),
457            ilseq_byte_subst_buffer);
458    if (!(outbytesleft%sizeof(unsigned int) == 0))
459      abort();
460    write_replacement(subst_mb_to_uc_temp_buffer,
461                      ilseq_byte_subst_size-(outbytesleft/sizeof(unsigned int)),
462                      callback_arg);
463  }
464}
465
466/* Auxiliary variables for subst_uc_to_mb_fallback. */
467/* Converter from locale encoding to target encoding. */
468static iconv_t subst_uc_to_mb_cd;
469/* Buffer of size ilseq_unicode_subst_size*4. */
470static char* subst_uc_to_mb_temp_buffer;
471
472static void subst_uc_to_mb_fallback
473            (unsigned int code,
474             void (*write_replacement) (const char *buf, size_t buflen,
475                                        void* callback_arg),
476             void* callback_arg,
477             void* data)
478{
479  const char* inptr;
480  size_t inbytesleft;
481  char* outptr;
482  size_t outbytesleft;
483  sprintf(ilseq_unicode_subst_buffer, ilseq_unicode_subst, code);
484  inptr = ilseq_unicode_subst_buffer;
485  inbytesleft = strlen(ilseq_unicode_subst_buffer);
486  outptr = subst_uc_to_mb_temp_buffer;
487  outbytesleft = ilseq_unicode_subst_size*4;
488  iconv(subst_uc_to_mb_cd,NULL,NULL,NULL,NULL);
489  if (iconv(subst_uc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
490      == (size_t)(-1)
491      || iconv(subst_uc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
492         == (size_t)(-1))
493    error(EXIT_FAILURE,0,
494          /* TRANSLATORS: An error message.
495             The %s placeholder expands to a piece of text, specified through --unicode-subst.  */
496          _("cannot convert unicode substitution to target encoding: %s"),
497          ilseq_unicode_subst_buffer);
498  write_replacement(subst_uc_to_mb_temp_buffer,
499                    ilseq_unicode_subst_size*4-outbytesleft,
500                    callback_arg);
501}
502
503#if HAVE_WCHAR_T
504
505/* Auxiliary variables for subst_mb_to_wc_fallback. */
506/* Converter from locale encoding to wchar_t. */
507static iconv_t subst_mb_to_wc_cd;
508/* Buffer of size ilseq_byte_subst_size. */
509static wchar_t* subst_mb_to_wc_temp_buffer;
510
511static void subst_mb_to_wc_fallback
512            (const char* inbuf, size_t inbufsize,
513             void (*write_replacement) (const wchar_t *buf, size_t buflen,
514                                        void* callback_arg),
515             void* callback_arg,
516             void* data)
517{
518  for (; inbufsize > 0; inbuf++, inbufsize--) {
519    const char* inptr;
520    size_t inbytesleft;
521    char* outptr;
522    size_t outbytesleft;
523    sprintf(ilseq_byte_subst_buffer,
524            ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
525    inptr = ilseq_byte_subst_buffer;
526    inbytesleft = strlen(ilseq_byte_subst_buffer);
527    outptr = (char*)subst_mb_to_wc_temp_buffer;
528    outbytesleft = ilseq_byte_subst_size*sizeof(wchar_t);
529    iconv(subst_mb_to_wc_cd,NULL,NULL,NULL,NULL);
530    if (iconv(subst_mb_to_wc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
531        == (size_t)(-1)
532        || iconv(subst_mb_to_wc_cd, NULL,NULL, &outptr,&outbytesleft)
533           == (size_t)(-1))
534      error(EXIT_FAILURE,0,
535            /* TRANSLATORS: An error message.
536               The %s placeholder expands to a piece of text, specified through --byte-subst.  */
537            _("cannot convert byte substitution to wide string: %s"),
538            ilseq_byte_subst_buffer);
539    if (!(outbytesleft%sizeof(wchar_t) == 0))
540      abort();
541    write_replacement(subst_mb_to_wc_temp_buffer,
542                      ilseq_byte_subst_size-(outbytesleft/sizeof(wchar_t)),
543                      callback_arg);
544  }
545}
546
547/* Auxiliary variables for subst_wc_to_mb_fallback. */
548/* Converter from locale encoding to target encoding. */
549static iconv_t subst_wc_to_mb_cd;
550/* Buffer of size ilseq_wchar_subst_size*4.
551   Hardcode factor 4, because MB_LEN_MAX is not reliable on some platforms. */
552static char* subst_wc_to_mb_temp_buffer;
553
554static void subst_wc_to_mb_fallback
555            (wchar_t code,
556             void (*write_replacement) (const char *buf, size_t buflen,
557                                        void* callback_arg),
558             void* callback_arg,
559             void* data)
560{
561  const char* inptr;
562  size_t inbytesleft;
563  char* outptr;
564  size_t outbytesleft;
565  sprintf(ilseq_wchar_subst_buffer, ilseq_wchar_subst, (unsigned int) code);
566  inptr = ilseq_wchar_subst_buffer;
567  inbytesleft = strlen(ilseq_wchar_subst_buffer);
568  outptr = subst_wc_to_mb_temp_buffer;
569  outbytesleft = ilseq_wchar_subst_size*4;
570  iconv(subst_wc_to_mb_cd,NULL,NULL,NULL,NULL);
571  if (iconv(subst_wc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
572      == (size_t)(-1)
573      || iconv(subst_wc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
574         == (size_t)(-1))
575    error(EXIT_FAILURE,0,
576          /* TRANSLATORS: An error message.
577             The %s placeholder expands to a piece of text, specified through --widechar-subst.  */
578          _("cannot convert widechar substitution to target encoding: %s"),
579          ilseq_wchar_subst_buffer);
580  write_replacement(subst_wc_to_mb_temp_buffer,
581                    ilseq_wchar_subst_size*4-outbytesleft,
582                    callback_arg);
583}
584
585#else
586
587#define subst_mb_to_wc_fallback NULL
588#define subst_wc_to_mb_fallback NULL
589
590#endif
591
592/* Auxiliary variables for subst_mb_to_mb_fallback. */
593/* Converter from locale encoding to target encoding. */
594static iconv_t subst_mb_to_mb_cd;
595/* Buffer of size ilseq_byte_subst_size*4. */
596static char* subst_mb_to_mb_temp_buffer;
597
598static void subst_mb_to_mb_fallback (const char* inbuf, size_t inbufsize)
599{
600  for (; inbufsize > 0; inbuf++, inbufsize--) {
601    const char* inptr;
602    size_t inbytesleft;
603    char* outptr;
604    size_t outbytesleft;
605    sprintf(ilseq_byte_subst_buffer,
606            ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
607    inptr = ilseq_byte_subst_buffer;
608    inbytesleft = strlen(ilseq_byte_subst_buffer);
609    outptr = subst_mb_to_mb_temp_buffer;
610    outbytesleft = ilseq_byte_subst_size*4;
611    iconv(subst_mb_to_mb_cd,NULL,NULL,NULL,NULL);
612    if (iconv(subst_mb_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
613        == (size_t)(-1)
614        || iconv(subst_mb_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
615           == (size_t)(-1))
616      error(EXIT_FAILURE,0,
617            /* TRANSLATORS: An error message.
618               The %s placeholder expands to a piece of text, specified through --byte-subst.  */
619            _("cannot convert byte substitution to target encoding: %s"),
620            ilseq_byte_subst_buffer);
621    fwrite(subst_mb_to_mb_temp_buffer,1,ilseq_byte_subst_size*4-outbytesleft,
622           stdout);
623  }
624}
625
626/* ========================================================================= */
627
628/* Error messages during conversion.  */
629
630static void conversion_error_EILSEQ (const char* infilename)
631{
632  fflush(stdout);
633  if (column > 0)
634    putc('\n',stderr);
635  error(0,0,
636        /* TRANSLATORS: An error message.
637           The placeholders expand to the input file name, a line number, and a column number.  */
638        _("%s:%u:%u: cannot convert"),
639        infilename,line,column);
640}
641
642static void conversion_error_EINVAL (const char* infilename)
643{
644  fflush(stdout);
645  if (column > 0)
646    putc('\n',stderr);
647  error(0,0,
648        /* TRANSLATORS: An error message.
649           The placeholders expand to the input file name, a line number, and a column number.
650           A "shift sequence" is a sequence of bytes that changes the state of the converter;
651           this concept exists only for "stateful" encodings like ISO-2022-JP.  */
652        _("%s:%u:%u: incomplete character or shift sequence"),
653        infilename,line,column);
654}
655
656static void conversion_error_other (int errnum, const char* infilename)
657{
658  fflush(stdout);
659  if (column > 0)
660    putc('\n',stderr);
661  error(0,errnum,
662        /* TRANSLATORS: The first part of an error message.
663           It is followed by a colon and a detail message.
664           The placeholders expand to the input file name, a line number, and a column number.  */
665        _("%s:%u:%u"),
666        infilename,line,column);
667}
668
669/* Convert the input given in infile.  */
670
671static int convert (iconv_t cd, int infile, const char* infilename)
672{
673  char inbuf[4096+4096];
674  size_t inbufrest = 0;
675  int infile_error = 0;
676  char initial_outbuf[4096];
677  char *outbuf = initial_outbuf;
678  size_t outbufsize = sizeof(initial_outbuf);
679  int status = 0;
680
681#if O_BINARY
682  SET_BINARY(infile);
683#endif
684  line = 1; column = 0;
685  iconv(cd,NULL,NULL,NULL,NULL);
686  for (;;) {
687    size_t inbufsize;
688    /* Transfer the accumulated output to its destination, in case the
689       safe_read() call will block. */
690    fflush(stdout);
691    inbufsize = safe_read(infile,inbuf+4096,4096);
692    if (inbufsize == 0 || inbufsize == SAFE_READ_ERROR) {
693      infile_error = (inbufsize == SAFE_READ_ERROR ? errno : 0);
694      if (inbufrest == 0)
695        break;
696      else {
697        if (ilseq_byte_subst != NULL)
698          subst_mb_to_mb_fallback(inbuf+4096-inbufrest, inbufrest);
699        if (!silent)
700          conversion_error_EINVAL(infilename);
701        status = 1;
702        goto done;
703      }
704    } else {
705      const char* inptr = inbuf+4096-inbufrest;
706      size_t insize = inbufrest+inbufsize;
707      inbufrest = 0;
708      while (insize > 0) {
709        char* outptr = outbuf;
710        size_t outsize = outbufsize;
711        size_t res = iconv(cd,(ICONV_CONST char**)&inptr,&insize,&outptr,&outsize);
712        if (outptr != outbuf) {
713          int saved_errno = errno;
714          if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
715            status = 1;
716            goto done;
717          }
718          errno = saved_errno;
719        }
720        if (res == (size_t)(-1)) {
721          if (errno == EILSEQ) {
722            if (discard_unconvertible == 1) {
723              int one = 1;
724              iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
725              discard_unconvertible = 2;
726              status = 1;
727            } else {
728              if (!silent)
729                conversion_error_EILSEQ(infilename);
730              status = 1;
731              goto done;
732            }
733          } else if (errno == EINVAL) {
734            if (inbufsize == 0 || insize > 4096) {
735              if (!silent)
736                conversion_error_EINVAL(infilename);
737              status = 1;
738              goto done;
739            } else {
740              inbufrest = insize;
741              if (insize > 0) {
742                /* Like memcpy(inbuf+4096-insize,inptr,insize), except that
743                   we cannot use memcpy here, because source and destination
744                   regions may overlap. */
745                char* restptr = inbuf+4096-insize;
746                do { *restptr++ = *inptr++; } while (--insize > 0);
747              }
748              break;
749            }
750          } else if (errno == E2BIG) {
751            if (outptr==outbuf) {
752              /* outbuf is too small. Double its size. */
753              if (outbuf != initial_outbuf)
754                free(outbuf);
755              outbufsize = 2*outbufsize;
756              if (outbufsize==0) /* integer overflow? */
757                xalloc_die();
758              outbuf = (char*)xmalloc(outbufsize);
759            }
760          } else {
761            if (!silent)
762              conversion_error_other(errno,infilename);
763            status = 1;
764            goto done;
765          }
766        }
767      }
768    }
769  }
770  for (;;) {
771    char* outptr = outbuf;
772    size_t outsize = outbufsize;
773    size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
774    if (outptr != outbuf) {
775      int saved_errno = errno;
776      if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
777        status = 1;
778        goto done;
779      }
780      errno = saved_errno;
781    }
782    if (res == (size_t)(-1)) {
783      if (errno == EILSEQ) {
784        if (discard_unconvertible == 1) {
785          int one = 1;
786          iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
787          discard_unconvertible = 2;
788          status = 1;
789        } else {
790          if (!silent)
791            conversion_error_EILSEQ(infilename);
792          status = 1;
793          goto done;
794        }
795      } else if (errno == EINVAL) {
796        if (!silent)
797          conversion_error_EINVAL(infilename);
798        status = 1;
799        goto done;
800      } else if (errno == E2BIG) {
801        if (outptr==outbuf) {
802          /* outbuf is too small. Double its size. */
803          if (outbuf != initial_outbuf)
804            free(outbuf);
805          outbufsize = 2*outbufsize;
806          if (outbufsize==0) /* integer overflow? */
807            xalloc_die();
808          outbuf = (char*)xmalloc(outbufsize);
809        }
810      } else {
811        if (!silent)
812          conversion_error_other(errno,infilename);
813        status = 1;
814        goto done;
815      }
816    } else
817      break;
818  }
819  if (infile_error) {
820    fflush(stdout);
821    if (column > 0)
822      putc('\n',stderr);
823    error(0,infile_error,
824          /* TRANSLATORS: An error message.
825             The placeholder expands to the input file name.  */
826          _("%s: I/O error"),
827          infilename);
828    status = 1;
829    goto done;
830  }
831 done:
832  if (outbuf != initial_outbuf)
833    free(outbuf);
834  return status;
835}
836
837/* ========================================================================= */
838
839int main (int argc, char* argv[])
840{
841  const char* fromcode = NULL;
842  const char* tocode = NULL;
843  int do_list = 0;
844  iconv_t cd;
845  struct iconv_fallbacks fallbacks;
846  struct iconv_hooks hooks;
847  int i;
848  int status;
849
850  set_program_name (argv[0]);
851#if HAVE_SETLOCALE
852  /* Needed for the locale dependent encodings, "char" and "wchar_t",
853     and for gettext. */
854  setlocale(LC_CTYPE,"");
855#if ENABLE_NLS
856  /* Needed for gettext. */
857  setlocale(LC_MESSAGES,"");
858#endif
859#endif
860#if ENABLE_NLS
861  bindtextdomain("libiconv",relocate(LOCALEDIR));
862#endif
863  textdomain("libiconv");
864  for (i = 1; i < argc;) {
865    size_t len = strlen(argv[i]);
866    if (!strcmp(argv[i],"--")) {
867      i++;
868      break;
869    }
870    if (!strcmp(argv[i],"-f")
871        /* --f ... --from-code */
872        || (len >= 3 && len <= 11 && !strncmp(argv[i],"--from-code",len))
873        /* --from-code=... */
874        || (len >= 12 && !strncmp(argv[i],"--from-code=",12))) {
875      if (len < 12)
876        if (i == argc-1) usage(1);
877      if (fromcode != NULL) usage(1);
878      if (len < 12) {
879        fromcode = argv[i+1];
880        i += 2;
881      } else {
882        fromcode = argv[i]+12;
883        i++;
884      }
885      continue;
886    }
887    if (!strcmp(argv[i],"-t")
888        /* --t ... --to-code */
889        || (len >= 3 && len <= 9 && !strncmp(argv[i],"--to-code",len))
890        /* --from-code=... */
891        || (len >= 10 && !strncmp(argv[i],"--to-code=",10))) {
892      if (len < 10)
893        if (i == argc-1) usage(1);
894      if (tocode != NULL) usage(1);
895      if (len < 10) {
896        tocode = argv[i+1];
897        i += 2;
898      } else {
899        tocode = argv[i]+10;
900        i++;
901      }
902      continue;
903    }
904    if (!strcmp(argv[i],"-l")
905        /* --l ... --list */
906        || (len >= 3 && len <= 6 && !strncmp(argv[i],"--list",len))) {
907      do_list = 1;
908      i++;
909      continue;
910    }
911    if (/* --by ... --byte-subst */
912        (len >= 4 && len <= 12 && !strncmp(argv[i],"--byte-subst",len))
913        /* --byte-subst=... */
914        || (len >= 13 && !strncmp(argv[i],"--byte-subst=",13))) {
915      if (len < 13) {
916        if (i == argc-1) usage(1);
917        ilseq_byte_subst = argv[i+1];
918        i += 2;
919      } else {
920        ilseq_byte_subst = argv[i]+13;
921        i++;
922      }
923      ilseq_byte_subst_size =
924        check_subst_formatstring(ilseq_byte_subst, "--byte-subst");
925      continue;
926    }
927    if (/* --w ... --widechar-subst */
928        (len >= 3 && len <= 16 && !strncmp(argv[i],"--widechar-subst",len))
929        /* --widechar-subst=... */
930        || (len >= 17 && !strncmp(argv[i],"--widechar-subst=",17))) {
931      if (len < 17) {
932        if (i == argc-1) usage(1);
933        ilseq_wchar_subst = argv[i+1];
934        i += 2;
935      } else {
936        ilseq_wchar_subst = argv[i]+17;
937        i++;
938      }
939      ilseq_wchar_subst_size =
940        check_subst_formatstring(ilseq_wchar_subst, "--widechar-subst");
941      continue;
942    }
943    if (/* --u ... --unicode-subst */
944        (len >= 3 && len <= 15 && !strncmp(argv[i],"--unicode-subst",len))
945        /* --unicode-subst=... */
946        || (len >= 16 && !strncmp(argv[i],"--unicode-subst=",16))) {
947      if (len < 16) {
948        if (i == argc-1) usage(1);
949        ilseq_unicode_subst = argv[i+1];
950        i += 2;
951      } else {
952        ilseq_unicode_subst = argv[i]+16;
953        i++;
954      }
955      ilseq_unicode_subst_size =
956        check_subst_formatstring(ilseq_unicode_subst, "--unicode-subst");
957      continue;
958    }
959    if /* --s ... --silent */
960       (len >= 3 && len <= 8 && !strncmp(argv[i],"--silent",len)) {
961      silent = 1;
962      continue;
963    }
964    if /* --h ... --help */
965       (len >= 3 && len <= 6 && !strncmp(argv[i],"--help",len)) {
966      usage(0);
967    }
968    if /* --v ... --version */
969       (len >= 3 && len <= 9 && !strncmp(argv[i],"--version",len)) {
970      print_version();
971    }
972#if O_BINARY
973    /* Backward compatibility with iconv <= 1.9.1. */
974    if /* --bi ... --binary */
975       (len >= 4 && len <= 8 && !strncmp(argv[i],"--binary",len)) {
976      i++;
977      continue;
978    }
979#endif
980    if (argv[i][0] == '-') {
981      const char *option = argv[i] + 1;
982      if (*option == '\0')
983        usage(1);
984      for (; *option; option++)
985        switch (*option) {
986          case 'c': discard_unconvertible = 1; break;
987          case 's': silent = 1; break;
988          default: usage(1);
989        }
990      i++;
991      continue;
992    }
993    break;
994  }
995  if (do_list) {
996    if (i != 2 || i != argc)
997      usage(1);
998    iconvlist(print_one,NULL);
999    status = 0;
1000  } else {
1001#if O_BINARY
1002    SET_BINARY(fileno(stdout));
1003#endif
1004    if (fromcode == NULL)
1005      fromcode = "char";
1006    if (tocode == NULL)
1007      tocode = "char";
1008    cd = iconv_open(tocode,fromcode);
1009    if (cd == (iconv_t)(-1)) {
1010      if (iconv_open("UCS-4",fromcode) == (iconv_t)(-1))
1011        error(0,0,
1012              /* TRANSLATORS: An error message.
1013                 The placeholder expands to the encoding name, specified through --from-code.  */
1014              _("conversion from %s unsupported"),
1015              fromcode);
1016      else if (iconv_open(tocode,"UCS-4") == (iconv_t)(-1))
1017        error(0,0,
1018              /* TRANSLATORS: An error message.
1019                 The placeholder expands to the encoding name, specified through --to-code.  */
1020              _("conversion to %s unsupported"),
1021              tocode);
1022      else
1023        error(0,0,
1024              /* TRANSLATORS: An error message.
1025                 The placeholders expand to the encoding names, specified through --from-code and --to-code, respectively.  */
1026              _("conversion from %s to %s unsupported"),
1027              fromcode,tocode);
1028      error(EXIT_FAILURE,0,
1029            /* TRANSLATORS: Additional advice after an error message.
1030               The %s placeholder expands to the program name.  */
1031            _("try '%s -l' to get the list of supported encodings"),
1032            program_name);
1033    }
1034    /* Look at fromcode and tocode, to determine whether character widths
1035       should be determined according to legacy CJK conventions. */
1036    cjkcode = iconv_canonicalize(tocode);
1037    if (!is_cjk_encoding(cjkcode))
1038      cjkcode = iconv_canonicalize(fromcode);
1039    /* Set up fallback routines for handling impossible conversions. */
1040    if (ilseq_byte_subst != NULL)
1041      ilseq_byte_subst_buffer = (char*)xmalloc((ilseq_byte_subst_size+1)*sizeof(char));
1042    if (!discard_unconvertible) {
1043      #if HAVE_WCHAR_T
1044      if (ilseq_wchar_subst != NULL)
1045        ilseq_wchar_subst_buffer = (char*)xmalloc((ilseq_wchar_subst_size+1)*sizeof(char));
1046      #endif
1047      if (ilseq_unicode_subst != NULL)
1048        ilseq_unicode_subst_buffer = (char*)xmalloc((ilseq_unicode_subst_size+1)*sizeof(char));
1049      if (ilseq_byte_subst != NULL) {
1050        subst_mb_to_uc_cd = iconv_open("UCS-4-INTERNAL","char");
1051        subst_mb_to_uc_temp_buffer = (unsigned int*)xmalloc(ilseq_byte_subst_size*sizeof(unsigned int));
1052        #if HAVE_WCHAR_T
1053        subst_mb_to_wc_cd = iconv_open("wchar_t","char");
1054        subst_mb_to_wc_temp_buffer = (wchar_t*)xmalloc(ilseq_byte_subst_size*sizeof(wchar_t));
1055        #endif
1056        subst_mb_to_mb_cd = iconv_open(tocode,"char");
1057        subst_mb_to_mb_temp_buffer = (char*)xmalloc(ilseq_byte_subst_size*4);
1058      }
1059      #if HAVE_WCHAR_T
1060      if (ilseq_wchar_subst != NULL) {
1061        subst_wc_to_mb_cd = iconv_open(tocode,"char");
1062        subst_wc_to_mb_temp_buffer = (char*)xmalloc(ilseq_wchar_subst_size*4);
1063      }
1064      #endif
1065      if (ilseq_unicode_subst != NULL) {
1066        subst_uc_to_mb_cd = iconv_open(tocode,"char");
1067        subst_uc_to_mb_temp_buffer = (char*)xmalloc(ilseq_unicode_subst_size*4);
1068      }
1069      fallbacks.mb_to_uc_fallback =
1070        (ilseq_byte_subst != NULL ? subst_mb_to_uc_fallback : NULL);
1071      fallbacks.uc_to_mb_fallback =
1072        (ilseq_unicode_subst != NULL ? subst_uc_to_mb_fallback : NULL);
1073      fallbacks.mb_to_wc_fallback =
1074        (ilseq_byte_subst != NULL ? subst_mb_to_wc_fallback : NULL);
1075      fallbacks.wc_to_mb_fallback =
1076        (ilseq_wchar_subst != NULL ? subst_wc_to_mb_fallback : NULL);
1077      fallbacks.data = NULL;
1078      iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks);
1079    }
1080    /* Set up hooks for updating the line and column position. */
1081    hooks.uc_hook = update_line_column;
1082    hooks.wc_hook = NULL;
1083    hooks.data = NULL;
1084    iconvctl(cd, ICONV_SET_HOOKS, &hooks);
1085    if (i == argc)
1086      status = convert(cd,fileno(stdin),
1087                       /* TRANSLATORS: A filename substitute denoting standard input.  */
1088                       _("(stdin)"));
1089    else {
1090      status = 0;
1091      for (; i < argc; i++) {
1092        const char* infilename = argv[i];
1093        FILE* infile = fopen(infilename,"r");
1094        if (infile == NULL) {
1095          int saved_errno = errno;
1096          error(0,saved_errno,
1097                /* TRANSLATORS: The first part of an error message.
1098                   It is followed by a colon and a detail message.
1099                   The %s placeholder expands to the input file name.  */
1100                _("%s"),
1101                infilename);
1102          status = 1;
1103        } else {
1104          status |= convert(cd,fileno(infile),infilename);
1105          fclose(infile);
1106        }
1107      }
1108    }
1109    iconv_close(cd);
1110  }
1111  if (ferror(stdout) || fclose(stdout)) {
1112    error(0,0,
1113          /* TRANSLATORS: An error message.  */
1114          _("I/O error"));
1115    status = 1;
1116  }
1117  exit(status);
1118}
1119