1/* Copyright (C) 2000-2005 Free Software Foundation, Inc.
2   This file is part of the GNU LIBICONV Library.
3
4   The GNU LIBICONV Library is free software; you can redistribute it
5   and/or modify it under the terms of the GNU Library General Public
6   License as published by the Free Software Foundation; either version 2
7   of the License, or (at your option) any later version.
8
9   The GNU LIBICONV Library is distributed in the hope that it will be
10   useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   Library General Public License for more details.
13
14   You should have received a copy of the GNU Library General Public
15   License along with the GNU LIBICONV Library; see the file COPYING.LIB.
16   If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
17   Fifth Floor, Boston, MA 02110-1301, USA.  */
18
19#include "config.h"
20#ifndef ICONV_CONST
21# define ICONV_CONST const
22#endif
23
24#include <stddef.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <iconv.h>
29#include <errno.h>
30#if HAVE_LOCALE_H
31#include <locale.h>
32#endif
33#include <fcntl.h>
34
35/* Ensure that iconv_no_i18n does not depend on libintl.  */
36#ifdef NO_I18N
37# undef ENABLE_NLS
38# undef ENABLE_RELOCATABLE
39#endif
40
41#include "binary-io.h"
42#include "progname.h"
43#include "relocatable.h"
44#include "uniwidth.h"
45#include "cjk.h"
46#include "gettext.h"
47
48#define _(str) gettext(str)
49
50static int discard_unconvertible = 0;
51static int silent = 0;
52
53static void usage (int exitcode)
54{
55  const char* helpstring1 =
56    _("Usage: iconv [-c] [-s] [-f fromcode] [-t tocode] [file ...]");
57  const char* helpstring2 =
58    _("or:    iconv -l");
59  fprintf(exitcode ? stderr : stdout, "%s\n%s\n", helpstring1, helpstring2);
60  exit(exitcode);
61}
62
63static void print_version (void)
64{
65  printf("iconv (GNU libiconv %d.%d)\n",
66         _libiconv_version >> 8, _libiconv_version & 0xff);
67  printf("Copyright (C) %s Free Software Foundation, Inc.\n", "2000-2002");
68  printf(_("\
69This is free software; see the source for copying conditions.  There is NO\n\
70warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"));
71  printf(_("Written by %s.\n"),"Bruno Haible");
72  exit(0);
73}
74
75static int print_one (unsigned int namescount, const char * const * names,
76                      void* data)
77{
78  unsigned int i;
79  (void)data;
80  for (i = 0; i < namescount; i++) {
81    if (i > 0)
82      putc(' ',stdout);
83    fputs(names[i],stdout);
84  }
85  putc('\n',stdout);
86  return 0;
87}
88
89/* Line number and column position. */
90static unsigned int line;
91static unsigned int column;
92static const char* cjkcode;
93/* Update the line number and column position after a character was
94   successfully converted. */
95static void update_line_column (unsigned int uc, void* data)
96{
97  if (uc == 0x000A) {
98    line++;
99    column = 0;
100  } else {
101    int width = uc_width(uc, cjkcode);
102    if (width >= 0)
103      column += width;
104    else if (uc == 0x0009)
105      column += 8 - (column % 8);
106  }
107}
108
109static int convert (iconv_t cd, FILE* infile, const char* infilename)
110{
111  char inbuf[4096+4096];
112  size_t inbufrest = 0;
113  char outbuf[4096];
114  int status = 0;
115
116#if O_BINARY
117  SET_BINARY(fileno(infile));
118#endif
119  line = 1; column = 0;
120  iconv(cd,NULL,NULL,NULL,NULL);
121  for (;;) {
122    size_t inbufsize = fread(inbuf+4096,1,4096,infile);
123    if (inbufsize == 0) {
124      if (inbufrest == 0)
125        break;
126      else {
127        if (!silent) {
128          fflush(stdout);
129          if (column > 0)
130            putc('\n',stderr);
131          fprintf(stderr,_("iconv: %s:%u:%u: incomplete character or shift sequence\n"),infilename,line,column);
132        }
133        return 1;
134      }
135    } else {
136      const char* inptr = inbuf+4096-inbufrest;
137      size_t insize = inbufrest+inbufsize;
138      inbufrest = 0;
139      while (insize > 0) {
140        char* outptr = outbuf;
141        size_t outsize = sizeof(outbuf);
142        size_t res = iconv(cd,(ICONV_CONST char**)&inptr,&insize,&outptr,&outsize);
143        if (outptr != outbuf) {
144          int saved_errno = errno;
145          if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf)
146            return 1;
147          errno = saved_errno;
148        }
149        if (res == (size_t)(-1)) {
150          if (errno == EILSEQ) {
151            if (discard_unconvertible == 1) {
152              int one = 1;
153              iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
154              discard_unconvertible = 2;
155              status = 1;
156            } else {
157              if (!silent) {
158                fflush(stdout);
159                if (column > 0)
160                  putc('\n',stderr);
161                fprintf(stderr,_("iconv: %s:%u:%u: cannot convert\n"),infilename,line,column);
162              }
163              return 1;
164            }
165          } else if (errno == EINVAL) {
166            if (inbufsize == 0 || insize > 4096) {
167              if (!silent) {
168                fflush(stdout);
169                if (column > 0)
170                  putc('\n',stderr);
171                fprintf(stderr,_("iconv: %s:%u:%u: incomplete character or shift sequence\n"),infilename,line,column);
172              }
173              return 1;
174            } else {
175              inbufrest = insize;
176              if (insize > 0) {
177                /* Like memcpy(inbuf+4096-insize,inptr,insize), except that
178                   we cannot use memcpy here, because source and destination
179                   regions may overlap. */
180                char* restptr = inbuf+4096-insize;
181                do { *restptr++ = *inptr++; } while (--insize > 0);
182              }
183              break;
184            }
185          } else if (errno != E2BIG) {
186            if (!silent) {
187              int saved_errno = errno;
188              fflush(stdout);
189              if (column > 0)
190                putc('\n',stderr);
191              fprintf(stderr,_("iconv: %s:%u:%u: "),infilename,line,column);
192              errno = saved_errno;
193              perror("");
194            }
195            return 1;
196          }
197        }
198      }
199    }
200  }
201  {
202    char* outptr = outbuf;
203    size_t outsize = sizeof(outbuf);
204    size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
205    if (outptr != outbuf) {
206      int saved_errno = errno;
207      if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf)
208        return 1;
209      errno = saved_errno;
210    }
211    if (res == (size_t)(-1)) {
212      if (errno == EILSEQ) {
213        if (discard_unconvertible == 1) {
214          int one = 1;
215          iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
216          discard_unconvertible = 2;
217          status = 1;
218        } else {
219          if (!silent) {
220            fflush(stdout);
221            if (column > 0)
222              putc('\n',stderr);
223            fprintf(stderr,_("iconv: %s:%u:%u: cannot convert\n"),infilename,line,column);
224          }
225          return 1;
226        }
227      } else if (errno == EINVAL) {
228        if (!silent) {
229          fflush(stdout);
230          if (column > 0)
231            putc('\n',stderr);
232          fprintf(stderr,_("iconv: %s:%u:%u: incomplete character or shift sequence\n"),infilename,line,column);
233        }
234        return 1;
235      } else {
236        if (!silent) {
237          int saved_errno = errno;
238          fflush(stdout);
239          if (column > 0)
240            putc('\n',stderr);
241          fprintf(stderr,_("iconv: %s:%u:%u: "),infilename,line,column);
242          errno = saved_errno;
243          perror("");
244        }
245        return 1;
246      }
247    }
248  }
249  if (ferror(infile)) {
250    fflush(stdout);
251    if (column > 0)
252      putc('\n',stderr);
253    fprintf(stderr,_("iconv: %s: I/O error\n"),infilename);
254    return 1;
255  }
256  return status;
257}
258
259int main (int argc, char* argv[])
260{
261  const char* fromcode = NULL;
262  const char* tocode = NULL;
263  int do_list = 0;
264  iconv_t cd;
265  struct iconv_hooks hooks;
266  int i;
267  int status;
268
269  set_program_name (argv[0]);
270#if HAVE_SETLOCALE
271  /* Needed for the locale dependent encodings, "char" and "wchar_t",
272     and for gettext. */
273  setlocale(LC_CTYPE,"");
274#if ENABLE_NLS
275  /* Needed for gettext. */
276  setlocale(LC_MESSAGES,"");
277#endif
278#endif
279#if ENABLE_NLS
280  bindtextdomain("libiconv",relocate(LOCALEDIR));
281#endif
282  textdomain("libiconv");
283  for (i = 1; i < argc;) {
284    if (!strcmp(argv[i],"--")) {
285      i++;
286      break;
287    }
288    if (!strcmp(argv[i],"-f")) {
289      if (i == argc-1) usage(1);
290      if (fromcode != NULL) usage(1);
291      fromcode = argv[i+1];
292      i += 2;
293      continue;
294    }
295    if (!strcmp(argv[i],"-t")) {
296      if (i == argc-1) usage(1);
297      if (tocode != NULL) usage(1);
298      tocode = argv[i+1];
299      i += 2;
300      continue;
301    }
302    if (!strcmp(argv[i],"-l")) {
303      do_list = 1;
304      i++;
305      continue;
306    }
307    if (!strcmp(argv[i],"--help")) {
308      usage(0);
309    }
310    if (!strcmp(argv[i],"--version")) {
311      print_version();
312    }
313#if O_BINARY
314    /* Backward compatibility with iconv <= 1.9.1. */
315    if (!strcmp(argv[i],"--binary")) {
316      i++;
317      continue;
318    }
319#endif
320    if (argv[i][0] == '-') {
321      const char *option = argv[i] + 1;
322      if (*option == '\0')
323        usage(1);
324      for (; *option; option++)
325        switch (*option) {
326          case 'c': discard_unconvertible = 1; break;
327          case 's': silent = 1; break;
328          default: usage(1);
329        }
330      i++;
331      continue;
332    }
333    break;
334  }
335  if (do_list) {
336    if (i != 2 || i != argc)
337      usage(1);
338    iconvlist(print_one,NULL);
339    status = 0;
340  } else {
341#if O_BINARY
342    SET_BINARY(fileno(stdout));
343#endif
344    if (fromcode == NULL)
345      fromcode = "char";
346    if (tocode == NULL)
347      tocode = "char";
348    cd = iconv_open(tocode,fromcode);
349    if (cd == (iconv_t)(-1)) {
350      if (iconv_open("UCS-4",fromcode) == (iconv_t)(-1))
351        fprintf(stderr,_("iconv: conversion from %s unsupported\n"),fromcode);
352      else if (iconv_open(tocode,"UCS-4") == (iconv_t)(-1))
353        fprintf(stderr,_("iconv: conversion to %s unsupported\n"),tocode);
354      else
355        fprintf(stderr,_("iconv: conversion from %s to %s unsupported\n"),fromcode,tocode);
356      fprintf(stderr,_("iconv: try '%s -l' to get the list of supported encodings\n"),program_name);
357      exit(1);
358    }
359    /* Look at fromcode and tocode, to determine whether character widths
360       should be determined according to legacy CJK conventions. */
361    cjkcode = iconv_canonicalize(tocode);
362    if (!is_cjk_encoding(cjkcode))
363      cjkcode = iconv_canonicalize(fromcode);
364    /* Set up hooks for updating the line and column position. */
365    hooks.uc_hook = update_line_column;
366    hooks.wc_hook = NULL;
367    hooks.data = NULL;
368    iconvctl(cd, ICONV_SET_HOOKS, &hooks);
369    if (i == argc)
370      status = convert(cd,stdin,_("(stdin)"));
371    else {
372      status = 0;
373      for (; i < argc; i++) {
374        const char* infilename = argv[i];
375        FILE* infile = fopen(infilename,"r");
376        if (infile == NULL) {
377          int saved_errno = errno;
378          fprintf(stderr,_("iconv: %s: "),infilename);
379          errno = saved_errno;
380          perror("");
381          status = 1;
382        } else {
383          status |= convert(cd,infile,infilename);
384          fclose(infile);
385        }
386      }
387    }
388    iconv_close(cd);
389  }
390  if (ferror(stdout) || fclose(stdout)) {
391    fprintf(stderr,_("iconv: I/O error\n"));
392    status = 1;
393  }
394  exit(status);
395}
396