1/* Edit translations using a subprocess.
2   Copyright (C) 2001-2005 Free Software Foundation, Inc.
3   Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software Foundation,
17   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
18
19
20#ifdef HAVE_CONFIG_H
21# include "config.h"
22#endif
23
24#include <errno.h>
25#include <fcntl.h>
26#include <getopt.h>
27#include <limits.h>
28#include <locale.h>
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32#include <sys/types.h>
33
34#if HAVE_SYS_TIME_H
35# include <sys/time.h>
36#endif
37
38#ifdef HAVE_UNISTD_H
39# include <unistd.h>
40#elif defined _MSC_VER || defined __MINGW32__
41# include <io.h>
42#endif
43
44/* Get fd_set (on AIX) or select() declaration (on EMX).  */
45#if defined (_AIX) || defined (__EMX__)
46# include <sys/select.h>
47#endif
48
49#include "closeout.h"
50#include "dir-list.h"
51#include "error.h"
52#include "error-progname.h"
53#include "progname.h"
54#include "relocatable.h"
55#include "basename.h"
56#include "message.h"
57#include "read-po.h"
58#include "write-po.h"
59#include "msgl-charset.h"
60#include "xalloc.h"
61#include "exit.h"
62#include "findprog.h"
63#include "pipe.h"
64#include "wait-process.h"
65#include "gettext.h"
66
67#define _(str) gettext (str)
68
69
70/* We use a child process, and communicate through a bidirectional pipe.
71   To avoid deadlocks, let the child process decide when it wants to read
72   or to write, and let the parent behave accordingly.  The parent uses
73   select() to know whether it must write or read.  On platforms without
74   select(), we use non-blocking I/O.  (This means the parent is busy
75   looping while waiting for the child.  Not good.)  */
76
77/* On BeOS select() works only on sockets, not on normal file descriptors.  */
78#ifdef __BEOS__
79# undef HAVE_SELECT
80#endif
81
82
83/* Force output of PO file even if empty.  */
84static int force_po;
85
86/* Keep the header entry unmodified.  */
87static int keep_header;
88
89/* Name of the subprogram.  */
90static const char *sub_name;
91
92/* Pathname of the subprogram.  */
93static const char *sub_path;
94
95/* Argument list for the subprogram.  */
96static char **sub_argv;
97static int sub_argc;
98
99/* Long options.  */
100static const struct option long_options[] =
101{
102  { "add-location", no_argument, &line_comment, 1 },
103  { "directory", required_argument, NULL, 'D' },
104  { "escape", no_argument, NULL, 'E' },
105  { "force-po", no_argument, &force_po, 1 },
106  { "help", no_argument, NULL, 'h' },
107  { "indent", no_argument, NULL, CHAR_MAX + 1 },
108  { "input", required_argument, NULL, 'i' },
109  { "keep-header", no_argument, &keep_header, 1 },
110  { "no-escape", no_argument, NULL, CHAR_MAX + 2 },
111  { "no-location", no_argument, &line_comment, 0 },
112  { "no-wrap", no_argument, NULL, CHAR_MAX + 3 },
113  { "output-file", required_argument, NULL, 'o' },
114  { "properties-input", no_argument, NULL, 'P' },
115  { "properties-output", no_argument, NULL, 'p' },
116  { "sort-by-file", no_argument, NULL, 'F' },
117  { "sort-output", no_argument, NULL, 's' },
118  { "strict", no_argument, NULL, 'S' },
119  { "stringtable-input", no_argument, NULL, CHAR_MAX + 4 },
120  { "stringtable-output", no_argument, NULL, CHAR_MAX + 5 },
121  { "version", no_argument, NULL, 'V' },
122  { "width", required_argument, NULL, 'w', },
123  { NULL, 0, NULL, 0 }
124};
125
126
127/* Forward declaration of local functions.  */
128static void usage (int status)
129#if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
130	__attribute__ ((noreturn))
131#endif
132;
133static msgdomain_list_ty *process_msgdomain_list (msgdomain_list_ty *mdlp);
134
135
136int
137main (int argc, char **argv)
138{
139  int opt;
140  bool do_help;
141  bool do_version;
142  char *output_file;
143  const char *input_file;
144  msgdomain_list_ty *result;
145  bool sort_by_filepos = false;
146  bool sort_by_msgid = false;
147  size_t i;
148
149  /* Set program name for messages.  */
150  set_program_name (argv[0]);
151  error_print_progname = maybe_print_progname;
152
153#ifdef HAVE_SETLOCALE
154  /* Set locale via LC_ALL.  */
155  setlocale (LC_ALL, "");
156#endif
157
158  /* Set the text message domain.  */
159  bindtextdomain (PACKAGE, relocate (LOCALEDIR));
160  textdomain (PACKAGE);
161
162  /* Ensure that write errors on stdout are detected.  */
163  atexit (close_stdout);
164
165  /* Set default values for variables.  */
166  do_help = false;
167  do_version = false;
168  output_file = NULL;
169  input_file = NULL;
170
171  /* The '+' in the options string causes option parsing to terminate when
172     the first non-option, i.e. the subprogram name, is encountered.  */
173  while ((opt = getopt_long (argc, argv, "+D:EFhi:o:pPsVw:", long_options,
174			     NULL))
175	 != EOF)
176    switch (opt)
177      {
178      case '\0':		/* Long option.  */
179	break;
180
181      case 'D':
182	dir_list_append (optarg);
183	break;
184
185      case 'E':
186	message_print_style_escape (true);
187	break;
188
189      case 'F':
190	sort_by_filepos = true;
191	break;
192
193      case 'h':
194	do_help = true;
195	break;
196
197      case 'i':
198	if (input_file != NULL)
199	  {
200	    error (EXIT_SUCCESS, 0, _("at most one input file allowed"));
201	    usage (EXIT_FAILURE);
202	  }
203	input_file = optarg;
204	break;
205
206      case 'o':
207	output_file = optarg;
208	break;
209
210      case 'p':
211	message_print_syntax_properties ();
212	break;
213
214      case 'P':
215	input_syntax = syntax_properties;
216	break;
217
218      case 's':
219	sort_by_msgid = true;
220	break;
221
222      case 'S':
223	message_print_style_uniforum ();
224	break;
225
226      case 'V':
227	do_version = true;
228	break;
229
230      case 'w':
231	{
232	  int value;
233	  char *endp;
234	  value = strtol (optarg, &endp, 10);
235	  if (endp != optarg)
236	    message_page_width_set (value);
237	}
238	break;
239
240      case CHAR_MAX + 1:
241	message_print_style_indent ();
242	break;
243
244      case CHAR_MAX + 2:
245	message_print_style_escape (false);
246	break;
247
248      case CHAR_MAX + 3: /* --no-wrap */
249	message_page_width_ignore ();
250	break;
251
252      case CHAR_MAX + 4: /* --stringtable-input */
253	input_syntax = syntax_stringtable;
254	break;
255
256      case CHAR_MAX + 5: /* --stringtable-output */
257	message_print_syntax_stringtable ();
258	break;
259
260      default:
261	usage (EXIT_FAILURE);
262	break;
263      }
264
265  /* Version information is requested.  */
266  if (do_version)
267    {
268      printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
269      /* xgettext: no-wrap */
270      printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
271This is free software; see the source for copying conditions.  There is NO\n\
272warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
273"),
274	      "2001-2005");
275      printf (_("Written by %s.\n"), "Bruno Haible");
276      exit (EXIT_SUCCESS);
277    }
278
279  /* Help is requested.  */
280  if (do_help)
281    usage (EXIT_SUCCESS);
282
283  /* Test for the subprogram name.  */
284  if (optind == argc)
285    error (EXIT_FAILURE, 0, _("missing filter name"));
286  sub_name = argv[optind];
287
288  /* Verify selected options.  */
289  if (!line_comment && sort_by_filepos)
290    error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
291	   "--no-location", "--sort-by-file");
292
293  if (sort_by_msgid && sort_by_filepos)
294    error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
295	   "--sort-output", "--sort-by-file");
296
297  /* Build argument list for the program.  */
298  sub_argc = argc - optind;
299  sub_argv = (char **) xmalloc ((sub_argc + 1) * sizeof (char *));
300  for (i = 0; i < sub_argc; i++)
301    sub_argv[i] = argv[optind + i];
302  sub_argv[i] = NULL;
303
304  /* Extra checks for sed scripts.  */
305  if (strcmp (sub_name, "sed") == 0)
306    {
307      if (sub_argc == 1)
308	error (EXIT_FAILURE, 0,
309	       _("at least one sed script must be specified"));
310
311      /* Replace GNU sed specific options with portable sed options.  */
312      for (i = 1; i < sub_argc; i++)
313	{
314	  if (strcmp (sub_argv[i], "--expression") == 0)
315	    sub_argv[i] = "-e";
316	  else if (strcmp (sub_argv[i], "--file") == 0)
317	    sub_argv[i] = "-f";
318	  else if (strcmp (sub_argv[i], "--quiet") == 0
319		   || strcmp (sub_argv[i], "--silent") == 0)
320	    sub_argv[i] = "-n";
321
322	  if (strcmp (sub_argv[i], "-e") == 0
323	      || strcmp (sub_argv[i], "-f") == 0)
324	    i++;
325	}
326    }
327
328  /* By default, input comes from standard input.  */
329  if (input_file == NULL)
330    input_file = "-";
331
332  /* Read input file.  */
333  result = read_po_file (input_file);
334
335  /* Warn if the current locale is not suitable for this PO file.  */
336  compare_po_locale_charsets (result);
337
338  /* Attempt to locate the program.
339     This is an optimization, to avoid that spawn/exec searches the PATH
340     on every call.  */
341  sub_path = find_in_path (sub_name);
342
343  /* Finish argument list for the program.  */
344  sub_argv[0] = (char *) sub_path;
345
346  /* Apply the subprogram.  */
347  result = process_msgdomain_list (result);
348
349  /* Sort the results.  */
350  if (sort_by_filepos)
351    msgdomain_list_sort_by_filepos (result);
352  else if (sort_by_msgid)
353    msgdomain_list_sort_by_msgid (result);
354
355  /* Write the merged message list out.  */
356  msgdomain_list_print (result, output_file, force_po, false);
357
358  exit (EXIT_SUCCESS);
359}
360
361
362/* Display usage information and exit.  */
363static void
364usage (int status)
365{
366  if (status != EXIT_SUCCESS)
367    fprintf (stderr, _("Try `%s --help' for more information.\n"),
368	     program_name);
369  else
370    {
371      printf (_("\
372Usage: %s [OPTION] FILTER [FILTER-OPTION]\n\
373"), program_name);
374      printf ("\n");
375      printf (_("\
376Applies a filter to all translations of a translation catalog.\n\
377"));
378      printf ("\n");
379      printf (_("\
380Mandatory arguments to long options are mandatory for short options too.\n"));
381      printf ("\n");
382      printf (_("\
383Input file location:\n"));
384      printf (_("\
385  -i, --input=INPUTFILE       input PO file\n"));
386      printf (_("\
387  -D, --directory=DIRECTORY   add DIRECTORY to list for input files search\n"));
388      printf (_("\
389If no input file is given or if it is -, standard input is read.\n"));
390      printf ("\n");
391      printf (_("\
392Output file location:\n"));
393      printf (_("\
394  -o, --output-file=FILE      write output to specified file\n"));
395      printf (_("\
396The results are written to standard output if no output file is specified\n\
397or if it is -.\n"));
398      printf ("\n");
399      printf (_("\
400The FILTER can be any program that reads a translation from standard input\n\
401and writes a modified translation to standard output.\n\
402"));
403      printf ("\n");
404      printf (_("\
405Useful FILTER-OPTIONs when the FILTER is 'sed':\n"));
406      printf (_("\
407  -e, --expression=SCRIPT     add SCRIPT to the commands to be executed\n"));
408      printf (_("\
409  -f, --file=SCRIPTFILE       add the contents of SCRIPTFILE to the commands\n\
410                                to be executed\n"));
411      printf (_("\
412  -n, --quiet, --silent       suppress automatic printing of pattern space\n"));
413      printf ("\n");
414      printf (_("\
415Input file syntax:\n"));
416      printf (_("\
417  -P, --properties-input      input file is in Java .properties syntax\n"));
418      printf (_("\
419      --stringtable-input     input file is in NeXTstep/GNUstep .strings syntax\n"));
420      printf ("\n");
421      printf (_("\
422Output details:\n"));
423      printf (_("\
424      --no-escape             do not use C escapes in output (default)\n"));
425      printf (_("\
426  -E, --escape                use C escapes in output, no extended chars\n"));
427      printf (_("\
428      --force-po              write PO file even if empty\n"));
429      printf (_("\
430      --indent                indented output style\n"));
431      printf (_("\
432      --keep-header           keep header entry unmodified, don't filter it\n"));
433      printf (_("\
434      --no-location           suppress '#: filename:line' lines\n"));
435      printf (_("\
436      --add-location          preserve '#: filename:line' lines (default)\n"));
437      printf (_("\
438      --strict                strict Uniforum output style\n"));
439      printf (_("\
440  -p, --properties-output     write out a Java .properties file\n"));
441      printf (_("\
442      --stringtable-output    write out a NeXTstep/GNUstep .strings file\n"));
443      printf (_("\
444  -w, --width=NUMBER          set output page width\n"));
445      printf (_("\
446      --no-wrap               do not break long message lines, longer than\n\
447                              the output page width, into several lines\n"));
448      printf (_("\
449  -s, --sort-output           generate sorted output\n"));
450      printf (_("\
451  -F, --sort-by-file          sort output by file location\n"));
452      printf ("\n");
453      printf (_("\
454Informative output:\n"));
455      printf (_("\
456  -h, --help                  display this help and exit\n"));
457      printf (_("\
458  -V, --version               output version information and exit\n"));
459      printf ("\n");
460      fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"),
461	     stdout);
462    }
463
464  exit (status);
465}
466
467
468#ifdef EINTR
469
470/* EINTR handling for close(), read(), write(), select().
471   These functions can return -1/EINTR even though we don't have any
472   signal handlers set up, namely when we get interrupted via SIGSTOP.  */
473
474static inline int
475nonintr_close (int fd)
476{
477  int retval;
478
479  do
480    retval = close (fd);
481  while (retval < 0 && errno == EINTR);
482
483  return retval;
484}
485#define close nonintr_close
486
487static inline ssize_t
488nonintr_read (int fd, void *buf, size_t count)
489{
490  ssize_t retval;
491
492  do
493    retval = read (fd, buf, count);
494  while (retval < 0 && errno == EINTR);
495
496  return retval;
497}
498#define read nonintr_read
499
500static inline ssize_t
501nonintr_write (int fd, const void *buf, size_t count)
502{
503  ssize_t retval;
504
505  do
506    retval = write (fd, buf, count);
507  while (retval < 0 && errno == EINTR);
508
509  return retval;
510}
511#undef write /* avoid warning on VMS */
512#define write nonintr_write
513
514# if HAVE_SELECT
515
516static inline int
517nonintr_select (int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
518		struct timeval *timeout)
519{
520  int retval;
521
522  do
523    retval = select (n, readfds, writefds, exceptfds, timeout);
524  while (retval < 0 && errno == EINTR);
525
526  return retval;
527}
528#undef select /* avoid warning on VMS */
529#define select nonintr_select
530
531# endif
532
533#endif
534
535
536/* Non-blocking I/O.  */
537#ifndef O_NONBLOCK
538# define O_NONBLOCK O_NDELAY
539#endif
540#if HAVE_SELECT
541# define IS_EAGAIN(errcode) 0
542#else
543# ifdef EWOULDBLOCK
544#  define IS_EAGAIN(errcode) ((errcode) == EAGAIN || (errcode) == EWOULDBLOCK)
545# else
546#  define IS_EAGAIN(errcode) ((errcode) == EAGAIN)
547# endif
548#endif
549
550/* Process a string STR of size LEN bytes through the subprogram, then
551   remove NUL bytes.
552   Store the freshly allocated result at *RESULTP and its length at *LENGTHP.
553 */
554static void
555process_string (const char *str, size_t len, char **resultp, size_t *lengthp)
556{
557#if defined _MSC_VER || defined __MINGW32__
558  /* Native Woe32 API.  */
559  /* Not yet implemented.  */
560  error (EXIT_FAILURE, 0, _("Not yet implemented."));
561#else
562  pid_t child;
563  int fd[2];
564  char *result;
565  size_t allocated;
566  size_t length;
567  int exitstatus;
568
569  /* Open a bidirectional pipe to a subprocess.  */
570  child = create_pipe_bidi (sub_name, sub_path, sub_argv, false, true, true,
571			    fd);
572
573  /* Enable non-blocking I/O.  This permits the read() and write() calls
574     to return -1/EAGAIN without blocking; this is important for polling
575     if HAVE_SELECT is not defined.  It also permits the read() and write()
576     calls to return after partial reads/writes; this is important if
577     HAVE_SELECT is defined, because select() only says that some data
578     can be read or written, not how many.  Without non-blocking I/O,
579     Linux 2.2.17 and BSD systems prefer to block instead of returning
580     with partial results.  */
581  {
582    int fcntl_flags;
583
584    if ((fcntl_flags = fcntl (fd[1], F_GETFL, 0)) < 0
585	|| fcntl (fd[1], F_SETFL, fcntl_flags | O_NONBLOCK) < 0
586	|| (fcntl_flags = fcntl (fd[0], F_GETFL, 0)) < 0
587	|| fcntl (fd[0], F_SETFL, fcntl_flags | O_NONBLOCK) < 0)
588      error (EXIT_FAILURE, errno,
589	     _("cannot set up nonblocking I/O to %s subprocess"), sub_name);
590  }
591
592  allocated = len + (len >> 2) + 1;
593  result = (char *) xmalloc (allocated);
594  length = 0;
595
596  for (;;)
597    {
598#if HAVE_SELECT
599      int n;
600      fd_set readfds;
601      fd_set writefds;
602
603      FD_ZERO (&readfds);
604      FD_SET (fd[0], &readfds);
605      n = fd[0] + 1;
606      if (str != NULL)
607	{
608	  FD_ZERO (&writefds);
609	  FD_SET (fd[1], &writefds);
610	  if (n <= fd[1])
611	    n = fd[1] + 1;
612	}
613
614      n = select (n, &readfds, (str != NULL ? &writefds : NULL), NULL, NULL);
615      if (n < 0)
616	error (EXIT_FAILURE, errno,
617	       _("communication with %s subprocess failed"), sub_name);
618      if (str != NULL && FD_ISSET (fd[1], &writefds))
619	goto try_write;
620      if (FD_ISSET (fd[0], &readfds))
621	goto try_read;
622      /* How could select() return if none of the two descriptors is ready?  */
623      abort ();
624#endif
625
626      /* Attempt to write.  */
627#if HAVE_SELECT
628    try_write:
629#endif
630      if (str != NULL)
631	{
632	  if (len > 0)
633	    {
634	      ssize_t nwritten = write (fd[1], str, len);
635	      if (nwritten < 0 && !IS_EAGAIN (errno))
636		error (EXIT_FAILURE, errno,
637		       _("write to %s subprocess failed"), sub_name);
638	      if (nwritten > 0)
639		{
640		  str += nwritten;
641		  len -= nwritten;
642		}
643	    }
644	  else
645	    {
646	      /* Tell the child there is nothing more the parent will send.  */
647	      close (fd[1]);
648	      str = NULL;
649	    }
650	}
651#if HAVE_SELECT
652      continue;
653#endif
654
655      /* Attempt to read.  */
656#if HAVE_SELECT
657    try_read:
658#endif
659      if (length == allocated)
660	{
661	  allocated = allocated + (allocated >> 1);
662	  result = (char *) xrealloc (result, allocated);
663	}
664      {
665	ssize_t nread = read (fd[0], result + length, allocated - length);
666	if (nread < 0 && !IS_EAGAIN (errno))
667	  error (EXIT_FAILURE, errno,
668		 _("read from %s subprocess failed"), sub_name);
669	if (nread > 0)
670	  length += nread;
671	if (nread == 0 && str == NULL)
672	  break;
673      }
674#if HAVE_SELECT
675      continue;
676#endif
677    }
678
679  close (fd[0]);
680
681  /* Remove zombie process from process list.  */
682  exitstatus = wait_subprocess (child, sub_name, false, false, true, true);
683  if (exitstatus != 0)
684    error (EXIT_FAILURE, 0, _("%s subprocess terminated with exit code %d"),
685	   sub_name, exitstatus);
686
687  /* Remove NUL bytes from result.  */
688  {
689    char *p = result;
690    char *pend = result + length;
691
692    for (; p < pend; p++)
693      if (*p == '\0')
694	{
695	  char *q;
696
697	  q = p;
698	  for (; p < pend; p++)
699	    if (*p != '\0')
700	      *q++ = *p;
701	  length = q - result;
702	  break;
703	}
704  }
705
706  *resultp = result;
707  *lengthp = length;
708#endif
709}
710
711
712static void
713process_message (message_ty *mp)
714{
715  const char *msgstr = mp->msgstr;
716  size_t msgstr_len = mp->msgstr_len;
717  size_t nsubstrings;
718  char **substrings;
719  size_t total_len;
720  char *total_str;
721  const char *p;
722  char *q;
723  size_t k;
724
725  /* Keep the header entry unmodified, if --keep-header was given.  */
726  if (mp->msgid[0] == '\0' && keep_header)
727    return;
728
729  /* Count NUL delimited substrings.  */
730  for (p = msgstr, nsubstrings = 0;
731       p < msgstr + msgstr_len;
732       p += strlen (p) + 1, nsubstrings++);
733
734  /* Process each NUL delimited substring separately.  */
735  substrings = (char **) xmalloc (nsubstrings * sizeof (char *));
736  for (p = msgstr, k = 0, total_len = 0; k < nsubstrings; k++)
737    {
738      char *result;
739      size_t length;
740
741      process_string (p, strlen (p), &result, &length);
742      result = (char *) xrealloc (result, length + 1);
743      result[length] = '\0';
744      substrings[k] = result;
745      total_len += length + 1;
746
747      p += strlen (p) + 1;
748    }
749
750  /* Concatenate the results, including the NUL after each.  */
751  total_str = (char *) xmalloc (total_len);
752  for (k = 0, q = total_str; k < nsubstrings; k++)
753    {
754      size_t length = strlen (substrings[k]);
755
756      memcpy (q, substrings[k], length + 1);
757      free (substrings[k]);
758      q += length + 1;
759    }
760  free (substrings);
761
762  mp->msgstr = total_str;
763  mp->msgstr_len = total_len;
764}
765
766
767static void
768process_message_list (message_list_ty *mlp)
769{
770  size_t j;
771
772  for (j = 0; j < mlp->nitems; j++)
773    process_message (mlp->item[j]);
774}
775
776
777static msgdomain_list_ty *
778process_msgdomain_list (msgdomain_list_ty *mdlp)
779{
780  size_t k;
781
782  for (k = 0; k < mdlp->nitems; k++)
783    process_message_list (mdlp->item[k]->messages);
784
785  return mdlp;
786}
787