1/* Recode Serbian text from Cyrillic to Latin script. 2 Copyright (C) 2006-2007 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2006. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18#ifdef HAVE_CONFIG_H 19# include "config.h" 20#endif 21 22#include <errno.h> 23#include <getopt.h> 24#include <stdbool.h> 25#include <stdio.h> 26#include <stdlib.h> 27#include <locale.h> 28 29#if HAVE_ICONV 30#include <iconv.h> 31#endif 32 33#include "closeout.h" 34#include "error.h" 35#include "progname.h" 36#include "relocatable.h" 37#include "basename.h" 38#include "xalloc.h" 39#include "localcharset.h" 40#include "c-strcase.h" 41#include "xstriconv.h" 42#include "filters.h" 43#include "propername.h" 44#include "gettext.h" 45 46#define _(str) gettext (str) 47 48 49/* Long options. */ 50static const struct option long_options[] = 51{ 52 { "help", no_argument, NULL, 'h' }, 53 { "version", no_argument, NULL, 'V' }, 54 { NULL, 0, NULL, 0 } 55}; 56 57/* Forward declaration of local functions. */ 58static void usage (int status) 59#if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2) 60 __attribute__ ((noreturn)) 61#endif 62; 63static void process (FILE *stream); 64 65int 66main (int argc, char *argv[]) 67{ 68 /* Default values for command line options. */ 69 bool do_help = false; 70 bool do_version = false; 71 72 int opt; 73 74 /* Set program name for message texts. */ 75 set_program_name (argv[0]); 76 77#ifdef HAVE_SETLOCALE 78 /* Set locale via LC_ALL. */ 79 setlocale (LC_ALL, ""); 80#endif 81 82 /* Set the text message domain. */ 83 bindtextdomain (PACKAGE, relocate (LOCALEDIR)); 84 textdomain (PACKAGE); 85 86 /* Ensure that write errors on stdout are detected. */ 87 atexit (close_stdout); 88 89 /* Parse command line options. */ 90 while ((opt = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF) 91 switch (opt) 92 { 93 case '\0': /* Long option. */ 94 break; 95 case 'h': 96 do_help = true; 97 break; 98 case 'V': 99 do_version = true; 100 break; 101 default: 102 usage (EXIT_FAILURE); 103 } 104 105 /* Version information is requested. */ 106 if (do_version) 107 { 108 printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION); 109 /* xgettext: no-wrap */ 110 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\ 111License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\ 112This is free software: you are free to change and redistribute it.\n\ 113There is NO WARRANTY, to the extent permitted by law.\n\ 114"), 115 "2006-2007"); 116 printf (_("Written by %s and %s.\n"), 117 /* TRANSLATORS: This is a proper name. The last name is 118 (with Unicode escapes) "\u0160egan" or (with HTML entities) 119 "Šegan". */ 120 proper_name_utf8 ("Danilo Segan", "Danilo \305\240egan"), 121 proper_name ("Bruno Haible")); 122 exit (EXIT_SUCCESS); 123 } 124 125 /* Help is requested. */ 126 if (do_help) 127 usage (EXIT_SUCCESS); 128 129 if (argc - optind > 0) 130 error (EXIT_FAILURE, 0, _("too many arguments")); 131 132 process (stdin); 133 134 exit (EXIT_SUCCESS); 135} 136 137 138/* Display usage information and exit. */ 139static void 140usage (int status) 141{ 142 if (status != EXIT_SUCCESS) 143 fprintf (stderr, _("Try `%s --help' for more information.\n"), 144 program_name); 145 else 146 { 147 /* xgettext: no-wrap */ 148 printf (_("\ 149Usage: %s [OPTION]\n\ 150"), program_name); 151 printf ("\n"); 152 /* xgettext: no-wrap */ 153 printf (_("\ 154Recode Serbian text from Cyrillic to Latin script.\n")); 155 /* xgettext: no-wrap */ 156 printf (_("\ 157The input text is read from standard input. The converted text is output to\n\ 158standard output.\n")); 159 printf ("\n"); 160 /* xgettext: no-wrap */ 161 printf (_("\ 162Informative output:\n")); 163 /* xgettext: no-wrap */ 164 printf (_("\ 165 -h, --help display this help and exit\n")); 166 /* xgettext: no-wrap */ 167 printf (_("\ 168 -V, --version output version information and exit\n")); 169 printf ("\n"); 170 /* TRANSLATORS: The placeholder indicates the bug-reporting address 171 for this package. Please add _another line_ saying 172 "Report translation bugs to <...>\n" with the address for translation 173 bugs (typically your translation team's web or email address). */ 174 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), stdout); 175 } 176 177 exit (status); 178} 179 180 181/* Routines for reading a line. 182 Don't use routines that drop NUL bytes. Don't use getline(), because it 183 doesn't provide a good error message in case of memory allocation failure. 184 The gnulib module 'linebuffer' is nearly the right thing, except that we 185 don't want an extra newline at the end of file. */ 186 187/* A 'struct linebuffer' holds a line of text. */ 188 189struct linebuffer 190{ 191 size_t size; /* Allocated. */ 192 size_t length; /* Used. */ 193 char *buffer; 194}; 195 196/* Initialize linebuffer LINEBUFFER for use. */ 197static inline void 198init_linebuffer (struct linebuffer *lb) 199{ 200 lb->size = 0; 201 lb->length = 0; 202 lb->buffer = NULL; 203} 204 205/* Read an arbitrarily long line of text from STREAM into linebuffer LB. 206 Keep the newline. Do not NUL terminate. 207 Return LINEBUFFER, except at end of file return NULL. */ 208static struct linebuffer * 209read_linebuffer (struct linebuffer *lb, FILE *stream) 210{ 211 if (feof (stream)) 212 return NULL; 213 else 214 { 215 char *p = lb->buffer; 216 char *end = lb->buffer + lb->size; 217 218 for (;;) 219 { 220 int c = getc (stream); 221 if (c == EOF) 222 { 223 if (p == lb->buffer || ferror (stream)) 224 return NULL; 225 break; 226 } 227 if (p == end) 228 { 229 size_t oldsize = lb->size; /* = p - lb->buffer */ 230 size_t newsize = 2 * oldsize + 40; 231 lb->buffer = (char *) xrealloc (lb->buffer, newsize); 232 lb->size = newsize; 233 p = lb->buffer + oldsize; 234 end = lb->buffer + newsize; 235 } 236 *p++ = c; 237 if (c == '\n') 238 break; 239 } 240 241 lb->length = p - lb->buffer; 242 return lb; 243 } 244} 245 246/* Free linebuffer LB and its data, all allocated with malloc. */ 247static inline void 248destroy_linebuffer (struct linebuffer *lb) 249{ 250 if (lb->buffer != NULL) 251 free (lb->buffer); 252} 253 254 255/* Process the input and produce the output. */ 256static void 257process (FILE *stream) 258{ 259 struct linebuffer lb; 260 const char *locale_code = locale_charset (); 261 bool need_code_conversion = (c_strcasecmp (locale_code, "UTF-8") != 0); 262#if HAVE_ICONV 263 iconv_t conv_to_utf8 = (iconv_t)(-1); 264 iconv_t conv_from_utf8 = (iconv_t)(-1); 265 char *last_utf8_line; 266 size_t last_utf8_line_len; 267 char *last_backconv_line; 268 size_t last_backconv_line_len; 269#endif 270 271 init_linebuffer (&lb); 272 273 /* Initialize the conversion descriptors. */ 274 if (need_code_conversion) 275 { 276#if HAVE_ICONV 277 /* Avoid glibc-2.1 bug with EUC-KR. */ 278# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION 279 if (strcmp (locale_code, "EUC-KR") != 0) 280# endif 281 { 282 conv_to_utf8 = iconv_open ("UTF-8", locale_code); 283 /* TODO: Maybe append //TRANSLIT here? */ 284 conv_from_utf8 = iconv_open (locale_code, "UTF-8"); 285 } 286 if (conv_to_utf8 == (iconv_t)(-1)) 287 error (EXIT_FAILURE, 0, _("\ 288Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \ 289and iconv() does not support this conversion."), 290 locale_code, "UTF-8", basename (program_name)); 291 if (conv_from_utf8 == (iconv_t)(-1)) 292 error (EXIT_FAILURE, 0, _("\ 293Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \ 294and iconv() does not support this conversion."), 295 "UTF-8", locale_code, basename (program_name)); 296 last_utf8_line = NULL; 297 last_utf8_line_len = 0; 298 last_backconv_line = NULL; 299 last_backconv_line_len = 0; 300#else 301 error (EXIT_FAILURE, 0, _("\ 302Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \ 303This version was built without iconv()."), 304 locale_code, "UTF-8", basename (program_name)); 305#endif 306 } 307 308 /* Read the input line by line. 309 Processing it character by character is not possible, because some 310 filters need to look at adjacent characters. Processing the entire file 311 in a whole chunk would take an excessive amount of memory. */ 312 for (;;) 313 { 314 char *line; 315 size_t line_len; 316 char *filtered_line; 317 size_t filtered_line_len; 318 319 /* Read a line. */ 320 if (read_linebuffer (&lb, stream) == NULL) 321 break; 322 line = lb.buffer; 323 line_len = lb.length; 324 /* read_linebuffer always returns a non-void result. */ 325 if (line_len == 0) 326 abort (); 327 328#if HAVE_ICONV 329 /* Convert it to UTF-8. */ 330 if (need_code_conversion) 331 { 332 char *utf8_line = last_utf8_line; 333 size_t utf8_line_len = last_utf8_line_len; 334 335 if (xmem_cd_iconv (line, line_len, conv_to_utf8, 336 &utf8_line, &utf8_line_len) != 0) 337 error (EXIT_FAILURE, errno, 338 _("input is not valid in \"%s\" encoding"), 339 locale_code); 340 if (utf8_line != last_utf8_line) 341 { 342 if (last_utf8_line != NULL) 343 free (last_utf8_line); 344 last_utf8_line = utf8_line; 345 last_utf8_line_len = utf8_line_len; 346 } 347 348 line = utf8_line; 349 line_len = utf8_line_len; 350 } 351#endif 352 353 /* Apply the filter. */ 354 serbian_to_latin (line, line_len, &filtered_line, &filtered_line_len); 355 356#if HAVE_ICONV 357 /* Convert it back to the original encoding. */ 358 if (need_code_conversion) 359 { 360 char *backconv_line = last_backconv_line; 361 size_t backconv_line_len = last_backconv_line_len; 362 363 if (xmem_cd_iconv (filtered_line, filtered_line_len, conv_from_utf8, 364 &backconv_line, &backconv_line_len) != 0) 365 error (EXIT_FAILURE, errno, 366 _("error while converting from \"%s\" encoding to \"%s\" encoding"), 367 "UTF-8", locale_code); 368 if (backconv_line != last_backconv_line) 369 { 370 if (last_backconv_line != NULL) 371 free (last_backconv_line); 372 last_backconv_line = backconv_line; 373 last_backconv_line_len = backconv_line_len; 374 } 375 376 fwrite (backconv_line, 1, backconv_line_len, stdout); 377 } 378 else 379#endif 380 fwrite (filtered_line, 1, filtered_line_len, stdout); 381 382 free (filtered_line); 383 } 384 385#if HAVE_ICONV 386 if (need_code_conversion) 387 { 388 iconv_close (conv_from_utf8); 389 iconv_close (conv_to_utf8); 390 } 391#endif 392 393 destroy_linebuffer (&lb); 394} 395