1/* paste - merge lines of files 2 Copyright (C) 1997-2005, 2008-2010 Free Software Foundation, Inc. 3 Copyright (C) 1984 David M. Ihnat 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation, either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18/* Written by David Ihnat. */ 19 20/* The list of valid escape sequences has been expanded over the Unix 21 version, to include \b, \f, \r, and \v. 22 23 POSIX changes, bug fixes, long-named options, and cleanup 24 by David MacKenzie <djm@gnu.ai.mit.edu>. 25 26 Options: 27 --serial 28 -s Paste one file at a time rather than 29 one line from each file. 30 --delimiters=delim-list 31 -d delim-list Consecutively use the characters in 32 DELIM-LIST instead of tab to separate 33 merged lines. When DELIM-LIST is exhausted, 34 start again at its beginning. 35 A FILE of `-' means standard input. 36 If no FILEs are given, standard input is used. */ 37 38#include <config.h> 39 40#include <stdio.h> 41#include <getopt.h> 42#include <sys/types.h> 43#include "system.h" 44#include "error.h" 45#include "quotearg.h" 46 47/* The official name of this program (e.g., no `g' prefix). */ 48#define PROGRAM_NAME "paste" 49 50#define AUTHORS \ 51 proper_name ("David M. Ihnat"), \ 52 proper_name ("David MacKenzie") 53 54/* Indicates that no delimiter should be added in the current position. */ 55#define EMPTY_DELIM '\0' 56 57/* If nonzero, we have read standard input at some point. */ 58static bool have_read_stdin; 59 60/* If nonzero, merge subsequent lines of each file rather than 61 corresponding lines from each file in parallel. */ 62static bool serial_merge; 63 64/* The delimeters between lines of input files (used cyclically). */ 65static char *delims; 66 67/* A pointer to the character after the end of `delims'. */ 68static char const *delim_end; 69 70static struct option const longopts[] = 71{ 72 {"serial", no_argument, NULL, 's'}, 73 {"delimiters", required_argument, NULL, 'd'}, 74 {GETOPT_HELP_OPTION_DECL}, 75 {GETOPT_VERSION_OPTION_DECL}, 76 {NULL, 0, NULL, 0} 77}; 78 79/* Set globals delims and delim_end. Copy STRPTR to DELIMS, converting 80 backslash representations of special characters in STRPTR to their actual 81 values. The set of possible backslash characters has been expanded beyond 82 that recognized by the Unix version. 83 Return 0 upon success. 84 If the string ends in an odd number of backslashes, ignore the 85 final backslash and return nonzero. */ 86 87static int 88collapse_escapes (char const *strptr) 89{ 90 char *strout = xstrdup (strptr); 91 bool backslash_at_end = false; 92 93 delims = strout; 94 95 while (*strptr) 96 { 97 if (*strptr != '\\') /* Is it an escape character? */ 98 *strout++ = *strptr++; /* No, just transfer it. */ 99 else 100 { 101 switch (*++strptr) 102 { 103 case '0': 104 *strout++ = EMPTY_DELIM; 105 break; 106 107 case 'b': 108 *strout++ = '\b'; 109 break; 110 111 case 'f': 112 *strout++ = '\f'; 113 break; 114 115 case 'n': 116 *strout++ = '\n'; 117 break; 118 119 case 'r': 120 *strout++ = '\r'; 121 break; 122 123 case 't': 124 *strout++ = '\t'; 125 break; 126 127 case 'v': 128 *strout++ = '\v'; 129 break; 130 131 case '\\': 132 *strout++ = '\\'; 133 break; 134 135 case '\0': 136 backslash_at_end = true; 137 goto done; 138 139 default: 140 *strout++ = *strptr; 141 break; 142 } 143 strptr++; 144 } 145 } 146 147 done:; 148 149 delim_end = strout; 150 return backslash_at_end ? 1 : 0; 151} 152 153/* Report a write error and exit. */ 154 155static void write_error (void) ATTRIBUTE_NORETURN; 156static void 157write_error (void) 158{ 159 error (EXIT_FAILURE, errno, _("write error")); 160 abort (); 161} 162 163/* Output a single byte, reporting any write errors. */ 164 165static inline void 166xputchar (char c) 167{ 168 if (putchar (c) < 0) 169 write_error (); 170} 171 172/* Perform column paste on the NFILES files named in FNAMPTR. 173 Return true if successful, false if one or more files could not be 174 opened or read. */ 175 176static bool 177paste_parallel (size_t nfiles, char **fnamptr) 178{ 179 bool ok = true; 180 /* If all files are just ready to be closed, or will be on this 181 round, the string of delimiters must be preserved. 182 delbuf[0] through delbuf[nfiles] 183 store the delimiters for closed files. */ 184 char *delbuf = xmalloc (nfiles + 2); 185 186 /* Streams open to the files to process; NULL if the corresponding 187 stream is closed. */ 188 FILE **fileptr = xnmalloc (nfiles + 1, sizeof *fileptr); 189 190 /* Number of files still open to process. */ 191 size_t files_open; 192 193 /* True if any fopen got fd == STDIN_FILENO. */ 194 bool opened_stdin = false; 195 196 /* Attempt to open all files. This could be expanded to an infinite 197 number of files, but at the (considerable) expense of remembering 198 each file and its current offset, then opening/reading/closing. */ 199 200 for (files_open = 0; files_open < nfiles; ++files_open) 201 { 202 if (STREQ (fnamptr[files_open], "-")) 203 { 204 have_read_stdin = true; 205 fileptr[files_open] = stdin; 206 } 207 else 208 { 209 fileptr[files_open] = fopen (fnamptr[files_open], "r"); 210 if (fileptr[files_open] == NULL) 211 error (EXIT_FAILURE, errno, "%s", fnamptr[files_open]); 212 else if (fileno (fileptr[files_open]) == STDIN_FILENO) 213 opened_stdin = true; 214 } 215 } 216 217 if (opened_stdin && have_read_stdin) 218 error (EXIT_FAILURE, 0, _("standard input is closed")); 219 220 /* Read a line from each file and output it to stdout separated by a 221 delimiter, until we go through the loop without successfully 222 reading from any of the files. */ 223 224 while (files_open) 225 { 226 /* Set up for the next line. */ 227 bool somedone = false; 228 char const *delimptr = delims; 229 size_t delims_saved = 0; /* Number of delims saved in `delbuf'. */ 230 size_t i; 231 232 for (i = 0; i < nfiles && files_open; i++) 233 { 234 int chr IF_LINT (= 0); /* Input character. */ 235 int err IF_LINT (= 0); /* Input errno value. */ 236 size_t line_length = 0; /* Number of chars in line. */ 237 238 if (fileptr[i]) 239 { 240 chr = getc (fileptr[i]); 241 err = errno; 242 if (chr != EOF && delims_saved) 243 { 244 if (fwrite (delbuf, 1, delims_saved, stdout) != delims_saved) 245 write_error (); 246 delims_saved = 0; 247 } 248 249 while (chr != EOF) 250 { 251 line_length++; 252 if (chr == '\n') 253 break; 254 xputchar (chr); 255 chr = getc (fileptr[i]); 256 err = errno; 257 } 258 } 259 260 if (line_length == 0) 261 { 262 /* EOF, read error, or closed file. 263 If an EOF or error, close the file. */ 264 if (fileptr[i]) 265 { 266 if (ferror (fileptr[i])) 267 { 268 error (0, err, "%s", fnamptr[i]); 269 ok = false; 270 } 271 if (fileptr[i] == stdin) 272 clearerr (fileptr[i]); /* Also clear EOF. */ 273 else if (fclose (fileptr[i]) == EOF) 274 { 275 error (0, errno, "%s", fnamptr[i]); 276 ok = false; 277 } 278 279 fileptr[i] = NULL; 280 files_open--; 281 } 282 283 if (i + 1 == nfiles) 284 { 285 /* End of this output line. 286 Is this the end of the whole thing? */ 287 if (somedone) 288 { 289 /* No. Some files were not closed for this line. */ 290 if (delims_saved) 291 { 292 if (fwrite (delbuf, 1, delims_saved, stdout) 293 != delims_saved) 294 write_error (); 295 delims_saved = 0; 296 } 297 xputchar ('\n'); 298 } 299 continue; /* Next read of files, or exit. */ 300 } 301 else 302 { 303 /* Closed file; add delimiter to `delbuf'. */ 304 if (*delimptr != EMPTY_DELIM) 305 delbuf[delims_saved++] = *delimptr; 306 if (++delimptr == delim_end) 307 delimptr = delims; 308 } 309 } 310 else 311 { 312 /* Some data read. */ 313 somedone = true; 314 315 /* Except for last file, replace last newline with delim. */ 316 if (i + 1 != nfiles) 317 { 318 if (chr != '\n' && chr != EOF) 319 xputchar (chr); 320 if (*delimptr != EMPTY_DELIM) 321 xputchar (*delimptr); 322 if (++delimptr == delim_end) 323 delimptr = delims; 324 } 325 else 326 { 327 /* If the last line of the last file lacks a newline, 328 print one anyhow. POSIX requires this. */ 329 char c = (chr == EOF ? '\n' : chr); 330 xputchar (c); 331 } 332 } 333 } 334 } 335 free (fileptr); 336 free (delbuf); 337 return ok; 338} 339 340/* Perform serial paste on the NFILES files named in FNAMPTR. 341 Return true if no errors, false if one or more files could not be 342 opened or read. */ 343 344static bool 345paste_serial (size_t nfiles, char **fnamptr) 346{ 347 bool ok = true; /* false if open or read errors occur. */ 348 int charnew, charold; /* Current and previous char read. */ 349 char const *delimptr; /* Current delimiter char. */ 350 FILE *fileptr; /* Open for reading current file. */ 351 352 for (; nfiles; nfiles--, fnamptr++) 353 { 354 int saved_errno; 355 bool is_stdin = STREQ (*fnamptr, "-"); 356 if (is_stdin) 357 { 358 have_read_stdin = true; 359 fileptr = stdin; 360 } 361 else 362 { 363 fileptr = fopen (*fnamptr, "r"); 364 if (fileptr == NULL) 365 { 366 error (0, errno, "%s", *fnamptr); 367 ok = false; 368 continue; 369 } 370 } 371 372 delimptr = delims; /* Set up for delimiter string. */ 373 374 charold = getc (fileptr); 375 saved_errno = errno; 376 if (charold != EOF) 377 { 378 /* `charold' is set up. Hit it! 379 Keep reading characters, stashing them in `charnew'; 380 output `charold', converting to the appropriate delimiter 381 character if needed. After the EOF, output `charold' 382 if it's a newline; otherwise, output it and then a newline. */ 383 384 while ((charnew = getc (fileptr)) != EOF) 385 { 386 /* Process the old character. */ 387 if (charold == '\n') 388 { 389 if (*delimptr != EMPTY_DELIM) 390 xputchar (*delimptr); 391 392 if (++delimptr == delim_end) 393 delimptr = delims; 394 } 395 else 396 xputchar (charold); 397 398 charold = charnew; 399 } 400 saved_errno = errno; 401 402 /* Hit EOF. Process that last character. */ 403 xputchar (charold); 404 } 405 406 if (charold != '\n') 407 xputchar ('\n'); 408 409 if (ferror (fileptr)) 410 { 411 error (0, saved_errno, "%s", *fnamptr); 412 ok = false; 413 } 414 if (is_stdin) 415 clearerr (fileptr); /* Also clear EOF. */ 416 else if (fclose (fileptr) == EOF) 417 { 418 error (0, errno, "%s", *fnamptr); 419 ok = false; 420 } 421 } 422 return ok; 423} 424 425void 426usage (int status) 427{ 428 if (status != EXIT_SUCCESS) 429 fprintf (stderr, _("Try `%s --help' for more information.\n"), 430 program_name); 431 else 432 { 433 printf (_("\ 434Usage: %s [OPTION]... [FILE]...\n\ 435"), 436 program_name); 437 fputs (_("\ 438Write lines consisting of the sequentially corresponding lines from\n\ 439each FILE, separated by TABs, to standard output.\n\ 440With no FILE, or when FILE is -, read standard input.\n\ 441\n\ 442"), stdout); 443 fputs (_("\ 444Mandatory arguments to long options are mandatory for short options too.\n\ 445"), stdout); 446 fputs (_("\ 447 -d, --delimiters=LIST reuse characters from LIST instead of TABs\n\ 448 -s, --serial paste one file at a time instead of in parallel\n\ 449"), stdout); 450 fputs (HELP_OPTION_DESCRIPTION, stdout); 451 fputs (VERSION_OPTION_DESCRIPTION, stdout); 452 /* FIXME: add a couple of examples. */ 453 emit_ancillary_info (); 454 } 455 exit (status); 456} 457 458int 459main (int argc, char **argv) 460{ 461 int optc; 462 bool ok; 463 char const *delim_arg = "\t"; 464 465 initialize_main (&argc, &argv); 466 set_program_name (argv[0]); 467 setlocale (LC_ALL, ""); 468 bindtextdomain (PACKAGE, LOCALEDIR); 469 textdomain (PACKAGE); 470 471 atexit (close_stdout); 472 473 have_read_stdin = false; 474 serial_merge = false; 475 476 while ((optc = getopt_long (argc, argv, "d:s", longopts, NULL)) != -1) 477 { 478 switch (optc) 479 { 480 case 'd': 481 /* Delimiter character(s). */ 482 delim_arg = (optarg[0] == '\0' ? "\\0" : optarg); 483 break; 484 485 case 's': 486 serial_merge = true; 487 break; 488 489 case_GETOPT_HELP_CHAR; 490 491 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); 492 493 default: 494 usage (EXIT_FAILURE); 495 } 496 } 497 498 if (optind == argc) 499 argv[argc++] = bad_cast ("-"); 500 501 if (collapse_escapes (delim_arg)) 502 { 503 /* Don't use the default quoting style, because that would double the 504 number of displayed backslashes, making the diagnostic look bogus. */ 505 set_quoting_style (NULL, escape_quoting_style); 506 error (EXIT_FAILURE, 0, 507 _("delimiter list ends with an unescaped backslash: %s"), 508 quotearg_colon (delim_arg)); 509 } 510 511 if (!serial_merge) 512 ok = paste_parallel (argc - optind, &argv[optind]); 513 else 514 ok = paste_serial (argc - optind, &argv[optind]); 515 516 free (delims); 517 518 if (have_read_stdin && fclose (stdin) == EOF) 519 error (EXIT_FAILURE, errno, "-"); 520 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); 521} 522