1/* paste - merge lines of files
2   Copyright (C) 1997-2005, 2008-2010 Free Software Foundation, Inc.
3   Copyright (C) 1984 David M. Ihnat
4
5   This program is free software: you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation, either version 3 of the License, or
8   (at your option) any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18/* Written by David Ihnat.  */
19
20/* The list of valid escape sequences has been expanded over the Unix
21   version, to include \b, \f, \r, and \v.
22
23   POSIX changes, bug fixes, long-named options, and cleanup
24   by David MacKenzie <djm@gnu.ai.mit.edu>.
25
26   Options:
27   --serial
28   -s				Paste one file at a time rather than
29                                one line from each file.
30   --delimiters=delim-list
31   -d delim-list		Consecutively use the characters in
32                                DELIM-LIST instead of tab to separate
33                                merged lines.  When DELIM-LIST is exhausted,
34                                start again at its beginning.
35   A FILE of `-' means standard input.
36   If no FILEs are given, standard input is used. */
37
38#include <config.h>
39
40#include <stdio.h>
41#include <getopt.h>
42#include <sys/types.h>
43#include "system.h"
44#include "error.h"
45#include "quotearg.h"
46
47/* The official name of this program (e.g., no `g' prefix).  */
48#define PROGRAM_NAME "paste"
49
50#define AUTHORS \
51  proper_name ("David M. Ihnat"), \
52  proper_name ("David MacKenzie")
53
54/* Indicates that no delimiter should be added in the current position. */
55#define EMPTY_DELIM '\0'
56
57/* If nonzero, we have read standard input at some point. */
58static bool have_read_stdin;
59
60/* If nonzero, merge subsequent lines of each file rather than
61   corresponding lines from each file in parallel. */
62static bool serial_merge;
63
64/* The delimeters between lines of input files (used cyclically). */
65static char *delims;
66
67/* A pointer to the character after the end of `delims'. */
68static char const *delim_end;
69
70static struct option const longopts[] =
71{
72  {"serial", no_argument, NULL, 's'},
73  {"delimiters", required_argument, NULL, 'd'},
74  {GETOPT_HELP_OPTION_DECL},
75  {GETOPT_VERSION_OPTION_DECL},
76  {NULL, 0, NULL, 0}
77};
78
79/* Set globals delims and delim_end.  Copy STRPTR to DELIMS, converting
80   backslash representations of special characters in STRPTR to their actual
81   values. The set of possible backslash characters has been expanded beyond
82   that recognized by the Unix version.
83   Return 0 upon success.
84   If the string ends in an odd number of backslashes, ignore the
85   final backslash and return nonzero.  */
86
87static int
88collapse_escapes (char const *strptr)
89{
90  char *strout = xstrdup (strptr);
91  bool backslash_at_end = false;
92
93  delims = strout;
94
95  while (*strptr)
96    {
97      if (*strptr != '\\')	/* Is it an escape character? */
98        *strout++ = *strptr++;	/* No, just transfer it. */
99      else
100        {
101          switch (*++strptr)
102            {
103            case '0':
104              *strout++ = EMPTY_DELIM;
105              break;
106
107            case 'b':
108              *strout++ = '\b';
109              break;
110
111            case 'f':
112              *strout++ = '\f';
113              break;
114
115            case 'n':
116              *strout++ = '\n';
117              break;
118
119            case 'r':
120              *strout++ = '\r';
121              break;
122
123            case 't':
124              *strout++ = '\t';
125              break;
126
127            case 'v':
128              *strout++ = '\v';
129              break;
130
131            case '\\':
132              *strout++ = '\\';
133              break;
134
135            case '\0':
136              backslash_at_end = true;
137              goto done;
138
139            default:
140              *strout++ = *strptr;
141              break;
142            }
143          strptr++;
144        }
145    }
146
147 done:;
148
149  delim_end = strout;
150  return backslash_at_end ? 1 : 0;
151}
152
153/* Report a write error and exit.  */
154
155static void write_error (void) ATTRIBUTE_NORETURN;
156static void
157write_error (void)
158{
159  error (EXIT_FAILURE, errno, _("write error"));
160  abort ();
161}
162
163/* Output a single byte, reporting any write errors.  */
164
165static inline void
166xputchar (char c)
167{
168  if (putchar (c) < 0)
169    write_error ();
170}
171
172/* Perform column paste on the NFILES files named in FNAMPTR.
173   Return true if successful, false if one or more files could not be
174   opened or read. */
175
176static bool
177paste_parallel (size_t nfiles, char **fnamptr)
178{
179  bool ok = true;
180  /* If all files are just ready to be closed, or will be on this
181     round, the string of delimiters must be preserved.
182     delbuf[0] through delbuf[nfiles]
183     store the delimiters for closed files. */
184  char *delbuf = xmalloc (nfiles + 2);
185
186  /* Streams open to the files to process; NULL if the corresponding
187     stream is closed.  */
188  FILE **fileptr = xnmalloc (nfiles + 1, sizeof *fileptr);
189
190  /* Number of files still open to process.  */
191  size_t files_open;
192
193  /* True if any fopen got fd == STDIN_FILENO.  */
194  bool opened_stdin = false;
195
196  /* Attempt to open all files.  This could be expanded to an infinite
197     number of files, but at the (considerable) expense of remembering
198     each file and its current offset, then opening/reading/closing.  */
199
200  for (files_open = 0; files_open < nfiles; ++files_open)
201    {
202      if (STREQ (fnamptr[files_open], "-"))
203        {
204          have_read_stdin = true;
205          fileptr[files_open] = stdin;
206        }
207      else
208        {
209          fileptr[files_open] = fopen (fnamptr[files_open], "r");
210          if (fileptr[files_open] == NULL)
211            error (EXIT_FAILURE, errno, "%s", fnamptr[files_open]);
212          else if (fileno (fileptr[files_open]) == STDIN_FILENO)
213            opened_stdin = true;
214        }
215    }
216
217  if (opened_stdin && have_read_stdin)
218    error (EXIT_FAILURE, 0, _("standard input is closed"));
219
220  /* Read a line from each file and output it to stdout separated by a
221     delimiter, until we go through the loop without successfully
222     reading from any of the files. */
223
224  while (files_open)
225    {
226      /* Set up for the next line. */
227      bool somedone = false;
228      char const *delimptr = delims;
229      size_t delims_saved = 0;	/* Number of delims saved in `delbuf'. */
230      size_t i;
231
232      for (i = 0; i < nfiles && files_open; i++)
233        {
234          int chr IF_LINT (= 0);	/* Input character. */
235          int err IF_LINT (= 0);	/* Input errno value.  */
236          size_t line_length = 0;	/* Number of chars in line. */
237
238          if (fileptr[i])
239            {
240              chr = getc (fileptr[i]);
241              err = errno;
242              if (chr != EOF && delims_saved)
243                {
244                  if (fwrite (delbuf, 1, delims_saved, stdout) != delims_saved)
245                    write_error ();
246                  delims_saved = 0;
247                }
248
249              while (chr != EOF)
250                {
251                  line_length++;
252                  if (chr == '\n')
253                    break;
254                  xputchar (chr);
255                  chr = getc (fileptr[i]);
256                  err = errno;
257                }
258            }
259
260          if (line_length == 0)
261            {
262              /* EOF, read error, or closed file.
263                 If an EOF or error, close the file.  */
264              if (fileptr[i])
265                {
266                  if (ferror (fileptr[i]))
267                    {
268                      error (0, err, "%s", fnamptr[i]);
269                      ok = false;
270                    }
271                  if (fileptr[i] == stdin)
272                    clearerr (fileptr[i]); /* Also clear EOF. */
273                  else if (fclose (fileptr[i]) == EOF)
274                    {
275                      error (0, errno, "%s", fnamptr[i]);
276                      ok = false;
277                    }
278
279                  fileptr[i] = NULL;
280                  files_open--;
281                }
282
283              if (i + 1 == nfiles)
284                {
285                  /* End of this output line.
286                     Is this the end of the whole thing? */
287                  if (somedone)
288                    {
289                      /* No.  Some files were not closed for this line. */
290                      if (delims_saved)
291                        {
292                          if (fwrite (delbuf, 1, delims_saved, stdout)
293                              != delims_saved)
294                            write_error ();
295                          delims_saved = 0;
296                        }
297                      xputchar ('\n');
298                    }
299                  continue;	/* Next read of files, or exit. */
300                }
301              else
302                {
303                  /* Closed file; add delimiter to `delbuf'. */
304                  if (*delimptr != EMPTY_DELIM)
305                    delbuf[delims_saved++] = *delimptr;
306                  if (++delimptr == delim_end)
307                    delimptr = delims;
308                }
309            }
310          else
311            {
312              /* Some data read. */
313              somedone = true;
314
315              /* Except for last file, replace last newline with delim. */
316              if (i + 1 != nfiles)
317                {
318                  if (chr != '\n' && chr != EOF)
319                    xputchar (chr);
320                  if (*delimptr != EMPTY_DELIM)
321                    xputchar (*delimptr);
322                  if (++delimptr == delim_end)
323                    delimptr = delims;
324                }
325              else
326                {
327                  /* If the last line of the last file lacks a newline,
328                     print one anyhow.  POSIX requires this.  */
329                  char c = (chr == EOF ? '\n' : chr);
330                  xputchar (c);
331                }
332            }
333        }
334    }
335  free (fileptr);
336  free (delbuf);
337  return ok;
338}
339
340/* Perform serial paste on the NFILES files named in FNAMPTR.
341   Return true if no errors, false if one or more files could not be
342   opened or read. */
343
344static bool
345paste_serial (size_t nfiles, char **fnamptr)
346{
347  bool ok = true;	/* false if open or read errors occur. */
348  int charnew, charold; /* Current and previous char read. */
349  char const *delimptr;	/* Current delimiter char. */
350  FILE *fileptr;	/* Open for reading current file. */
351
352  for (; nfiles; nfiles--, fnamptr++)
353    {
354      int saved_errno;
355      bool is_stdin = STREQ (*fnamptr, "-");
356      if (is_stdin)
357        {
358          have_read_stdin = true;
359          fileptr = stdin;
360        }
361      else
362        {
363          fileptr = fopen (*fnamptr, "r");
364          if (fileptr == NULL)
365            {
366              error (0, errno, "%s", *fnamptr);
367              ok = false;
368              continue;
369            }
370        }
371
372      delimptr = delims;	/* Set up for delimiter string. */
373
374      charold = getc (fileptr);
375      saved_errno = errno;
376      if (charold != EOF)
377        {
378          /* `charold' is set up.  Hit it!
379             Keep reading characters, stashing them in `charnew';
380             output `charold', converting to the appropriate delimiter
381             character if needed.  After the EOF, output `charold'
382             if it's a newline; otherwise, output it and then a newline. */
383
384          while ((charnew = getc (fileptr)) != EOF)
385            {
386              /* Process the old character. */
387              if (charold == '\n')
388                {
389                  if (*delimptr != EMPTY_DELIM)
390                    xputchar (*delimptr);
391
392                  if (++delimptr == delim_end)
393                    delimptr = delims;
394                }
395              else
396                xputchar (charold);
397
398              charold = charnew;
399            }
400          saved_errno = errno;
401
402          /* Hit EOF.  Process that last character. */
403          xputchar (charold);
404        }
405
406      if (charold != '\n')
407        xputchar ('\n');
408
409      if (ferror (fileptr))
410        {
411          error (0, saved_errno, "%s", *fnamptr);
412          ok = false;
413        }
414      if (is_stdin)
415        clearerr (fileptr);	/* Also clear EOF. */
416      else if (fclose (fileptr) == EOF)
417        {
418          error (0, errno, "%s", *fnamptr);
419          ok = false;
420        }
421    }
422  return ok;
423}
424
425void
426usage (int status)
427{
428  if (status != EXIT_SUCCESS)
429    fprintf (stderr, _("Try `%s --help' for more information.\n"),
430             program_name);
431  else
432    {
433      printf (_("\
434Usage: %s [OPTION]... [FILE]...\n\
435"),
436              program_name);
437      fputs (_("\
438Write lines consisting of the sequentially corresponding lines from\n\
439each FILE, separated by TABs, to standard output.\n\
440With no FILE, or when FILE is -, read standard input.\n\
441\n\
442"), stdout);
443      fputs (_("\
444Mandatory arguments to long options are mandatory for short options too.\n\
445"), stdout);
446      fputs (_("\
447  -d, --delimiters=LIST   reuse characters from LIST instead of TABs\n\
448  -s, --serial            paste one file at a time instead of in parallel\n\
449"), stdout);
450      fputs (HELP_OPTION_DESCRIPTION, stdout);
451      fputs (VERSION_OPTION_DESCRIPTION, stdout);
452      /* FIXME: add a couple of examples.  */
453      emit_ancillary_info ();
454    }
455  exit (status);
456}
457
458int
459main (int argc, char **argv)
460{
461  int optc;
462  bool ok;
463  char const *delim_arg = "\t";
464
465  initialize_main (&argc, &argv);
466  set_program_name (argv[0]);
467  setlocale (LC_ALL, "");
468  bindtextdomain (PACKAGE, LOCALEDIR);
469  textdomain (PACKAGE);
470
471  atexit (close_stdout);
472
473  have_read_stdin = false;
474  serial_merge = false;
475
476  while ((optc = getopt_long (argc, argv, "d:s", longopts, NULL)) != -1)
477    {
478      switch (optc)
479        {
480        case 'd':
481          /* Delimiter character(s). */
482          delim_arg = (optarg[0] == '\0' ? "\\0" : optarg);
483          break;
484
485        case 's':
486          serial_merge = true;
487          break;
488
489        case_GETOPT_HELP_CHAR;
490
491        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
492
493        default:
494          usage (EXIT_FAILURE);
495        }
496    }
497
498  if (optind == argc)
499    argv[argc++] = bad_cast ("-");
500
501  if (collapse_escapes (delim_arg))
502    {
503      /* Don't use the default quoting style, because that would double the
504         number of displayed backslashes, making the diagnostic look bogus.  */
505      set_quoting_style (NULL, escape_quoting_style);
506      error (EXIT_FAILURE, 0,
507             _("delimiter list ends with an unescaped backslash: %s"),
508             quotearg_colon (delim_arg));
509    }
510
511  if (!serial_merge)
512    ok = paste_parallel (argc - optind, &argv[optind]);
513  else
514    ok = paste_serial (argc - optind, &argv[optind]);
515
516  free (delims);
517
518  if (have_read_stdin && fclose (stdin) == EOF)
519    error (EXIT_FAILURE, errno, "-");
520  exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
521}
522