1/* cat -- concatenate files and print on the standard output. 2 Copyright (C) 1988, 1990-1991, 1995-2010 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation, either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17/* Differences from the Unix cat: 18 * Always unbuffered, -u is ignored. 19 * Usually much faster than other versions of cat, the difference 20 is especially apparent when using the -v option. 21 22 By tege@sics.se, Torbjorn Granlund, advised by rms, Richard Stallman. */ 23 24#include <config.h> 25 26#include <stdio.h> 27#include <getopt.h> 28#include <sys/types.h> 29 30#if HAVE_STROPTS_H 31# include <stropts.h> 32#endif 33#if HAVE_SYS_IOCTL_H 34# include <sys/ioctl.h> 35#endif 36 37#include "system.h" 38#include "error.h" 39#include "full-write.h" 40#include "quote.h" 41#include "safe-read.h" 42#include "xfreopen.h" 43 44/* The official name of this program (e.g., no `g' prefix). */ 45#define PROGRAM_NAME "cat" 46 47#define AUTHORS \ 48 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \ 49 proper_name ("Richard M. Stallman") 50 51/* Name of input file. May be "-". */ 52static char const *infile; 53 54/* Descriptor on which input file is open. */ 55static int input_desc; 56 57/* Buffer for line numbers. 58 An 11 digit counter may overflow within an hour on a P2/466, 59 an 18 digit counter needs about 1000y */ 60#define LINE_COUNTER_BUF_LEN 20 61static char line_buf[LINE_COUNTER_BUF_LEN] = 62 { 63 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 64 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '0', 65 '\t', '\0' 66 }; 67 68/* Position in `line_buf' where printing starts. This will not change 69 unless the number of lines is larger than 999999. */ 70static char *line_num_print = line_buf + LINE_COUNTER_BUF_LEN - 8; 71 72/* Position of the first digit in `line_buf'. */ 73static char *line_num_start = line_buf + LINE_COUNTER_BUF_LEN - 3; 74 75/* Position of the last digit in `line_buf'. */ 76static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3; 77 78/* Preserves the `cat' function's local `newlines' between invocations. */ 79static int newlines2 = 0; 80 81void 82usage (int status) 83{ 84 if (status != EXIT_SUCCESS) 85 fprintf (stderr, _("Try `%s --help' for more information.\n"), 86 program_name); 87 else 88 { 89 printf (_("\ 90Usage: %s [OPTION]... [FILE]...\n\ 91"), 92 program_name); 93 fputs (_("\ 94Concatenate FILE(s), or standard input, to standard output.\n\ 95\n\ 96 -A, --show-all equivalent to -vET\n\ 97 -b, --number-nonblank number nonempty output lines\n\ 98 -e equivalent to -vE\n\ 99 -E, --show-ends display $ at end of each line\n\ 100 -n, --number number all output lines\n\ 101 -s, --squeeze-blank suppress repeated empty output lines\n\ 102"), stdout); 103 fputs (_("\ 104 -t equivalent to -vT\n\ 105 -T, --show-tabs display TAB characters as ^I\n\ 106 -u (ignored)\n\ 107 -v, --show-nonprinting use ^ and M- notation, except for LFD and TAB\n\ 108"), stdout); 109 fputs (HELP_OPTION_DESCRIPTION, stdout); 110 fputs (VERSION_OPTION_DESCRIPTION, stdout); 111 fputs (_("\ 112\n\ 113With no FILE, or when FILE is -, read standard input.\n\ 114"), stdout); 115 printf (_("\ 116\n\ 117Examples:\n\ 118 %s f - g Output f's contents, then standard input, then g's contents.\n\ 119 %s Copy standard input to standard output.\n\ 120"), 121 program_name, program_name); 122 emit_ancillary_info (); 123 } 124 exit (status); 125} 126 127/* Compute the next line number. */ 128 129static void 130next_line_num (void) 131{ 132 char *endp = line_num_end; 133 do 134 { 135 if ((*endp)++ < '9') 136 return; 137 *endp-- = '0'; 138 } 139 while (endp >= line_num_start); 140 if (line_num_start > line_buf) 141 *--line_num_start = '1'; 142 else 143 *line_buf = '>'; 144 if (line_num_start < line_num_print) 145 line_num_print--; 146} 147 148/* Plain cat. Copies the file behind `input_desc' to STDOUT_FILENO. 149 Return true if successful. */ 150 151static bool 152simple_cat ( 153 /* Pointer to the buffer, used by reads and writes. */ 154 char *buf, 155 156 /* Number of characters preferably read or written by each read and write 157 call. */ 158 size_t bufsize) 159{ 160 /* Actual number of characters read, and therefore written. */ 161 size_t n_read; 162 163 /* Loop until the end of the file. */ 164 165 for (;;) 166 { 167 /* Read a block of input. */ 168 169 n_read = safe_read (input_desc, buf, bufsize); 170 if (n_read == SAFE_READ_ERROR) 171 { 172 error (0, errno, "%s", infile); 173 return false; 174 } 175 176 /* End of this file? */ 177 178 if (n_read == 0) 179 return true; 180 181 /* Write this block out. */ 182 183 { 184 /* The following is ok, since we know that 0 < n_read. */ 185 size_t n = n_read; 186 if (full_write (STDOUT_FILENO, buf, n) != n) 187 error (EXIT_FAILURE, errno, _("write error")); 188 } 189 } 190} 191 192/* Write any pending output to STDOUT_FILENO. 193 Pending is defined to be the *BPOUT - OUTBUF bytes starting at OUTBUF. 194 Then set *BPOUT to OUTPUT if it's not already that value. */ 195 196static inline void 197write_pending (char *outbuf, char **bpout) 198{ 199 size_t n_write = *bpout - outbuf; 200 if (0 < n_write) 201 { 202 if (full_write (STDOUT_FILENO, outbuf, n_write) != n_write) 203 error (EXIT_FAILURE, errno, _("write error")); 204 *bpout = outbuf; 205 } 206} 207 208/* Cat the file behind INPUT_DESC to the file behind OUTPUT_DESC. 209 Return true if successful. 210 Called if any option more than -u was specified. 211 212 A newline character is always put at the end of the buffer, to make 213 an explicit test for buffer end unnecessary. */ 214 215static bool 216cat ( 217 /* Pointer to the beginning of the input buffer. */ 218 char *inbuf, 219 220 /* Number of characters read in each read call. */ 221 size_t insize, 222 223 /* Pointer to the beginning of the output buffer. */ 224 char *outbuf, 225 226 /* Number of characters written by each write call. */ 227 size_t outsize, 228 229 /* Variables that have values according to the specified options. */ 230 bool show_nonprinting, 231 bool show_tabs, 232 bool number, 233 bool number_nonblank, 234 bool show_ends, 235 bool squeeze_blank) 236{ 237 /* Last character read from the input buffer. */ 238 unsigned char ch; 239 240 /* Pointer to the next character in the input buffer. */ 241 char *bpin; 242 243 /* Pointer to the first non-valid byte in the input buffer, i.e. the 244 current end of the buffer. */ 245 char *eob; 246 247 /* Pointer to the position where the next character shall be written. */ 248 char *bpout; 249 250 /* Number of characters read by the last read call. */ 251 size_t n_read; 252 253 /* Determines how many consecutive newlines there have been in the 254 input. 0 newlines makes NEWLINES -1, 1 newline makes NEWLINES 1, 255 etc. Initially 0 to indicate that we are at the beginning of a 256 new line. The "state" of the procedure is determined by 257 NEWLINES. */ 258 int newlines = newlines2; 259 260#ifdef FIONREAD 261 /* If nonzero, use the FIONREAD ioctl, as an optimization. 262 (On Ultrix, it is not supported on NFS file systems.) */ 263 bool use_fionread = true; 264#endif 265 266 /* The inbuf pointers are initialized so that BPIN > EOB, and thereby input 267 is read immediately. */ 268 269 eob = inbuf; 270 bpin = eob + 1; 271 272 bpout = outbuf; 273 274 for (;;) 275 { 276 do 277 { 278 /* Write if there are at least OUTSIZE bytes in OUTBUF. */ 279 280 if (outbuf + outsize <= bpout) 281 { 282 char *wp = outbuf; 283 size_t remaining_bytes; 284 do 285 { 286 if (full_write (STDOUT_FILENO, wp, outsize) != outsize) 287 error (EXIT_FAILURE, errno, _("write error")); 288 wp += outsize; 289 remaining_bytes = bpout - wp; 290 } 291 while (outsize <= remaining_bytes); 292 293 /* Move the remaining bytes to the beginning of the 294 buffer. */ 295 296 memmove (outbuf, wp, remaining_bytes); 297 bpout = outbuf + remaining_bytes; 298 } 299 300 /* Is INBUF empty? */ 301 302 if (bpin > eob) 303 { 304 bool input_pending = false; 305#ifdef FIONREAD 306 int n_to_read = 0; 307 308 /* Is there any input to read immediately? 309 If not, we are about to wait, 310 so write all buffered output before waiting. */ 311 312 if (use_fionread 313 && ioctl (input_desc, FIONREAD, &n_to_read) < 0) 314 { 315 /* Ultrix returns EOPNOTSUPP on NFS; 316 HP-UX returns ENOTTY on pipes. 317 SunOS returns EINVAL and 318 More/BSD returns ENODEV on special files 319 like /dev/null. 320 Irix-5 returns ENOSYS on pipes. */ 321 if (errno == EOPNOTSUPP || errno == ENOTTY 322 || errno == EINVAL || errno == ENODEV 323 || errno == ENOSYS) 324 use_fionread = false; 325 else 326 { 327 error (0, errno, _("cannot do ioctl on %s"), quote (infile)); 328 newlines2 = newlines; 329 return false; 330 } 331 } 332 if (n_to_read != 0) 333 input_pending = true; 334#endif 335 336 if (!input_pending) 337 write_pending (outbuf, &bpout); 338 339 /* Read more input into INBUF. */ 340 341 n_read = safe_read (input_desc, inbuf, insize); 342 if (n_read == SAFE_READ_ERROR) 343 { 344 error (0, errno, "%s", infile); 345 write_pending (outbuf, &bpout); 346 newlines2 = newlines; 347 return false; 348 } 349 if (n_read == 0) 350 { 351 write_pending (outbuf, &bpout); 352 newlines2 = newlines; 353 return true; 354 } 355 356 /* Update the pointers and insert a sentinel at the buffer 357 end. */ 358 359 bpin = inbuf; 360 eob = bpin + n_read; 361 *eob = '\n'; 362 } 363 else 364 { 365 /* It was a real (not a sentinel) newline. */ 366 367 /* Was the last line empty? 368 (i.e. have two or more consecutive newlines been read?) */ 369 370 if (++newlines > 0) 371 { 372 if (newlines >= 2) 373 { 374 /* Limit this to 2 here. Otherwise, with lots of 375 consecutive newlines, the counter could wrap 376 around at INT_MAX. */ 377 newlines = 2; 378 379 /* Are multiple adjacent empty lines to be substituted 380 by single ditto (-s), and this was the second empty 381 line? */ 382 if (squeeze_blank) 383 { 384 ch = *bpin++; 385 continue; 386 } 387 } 388 389 /* Are line numbers to be written at empty lines (-n)? */ 390 391 if (number && !number_nonblank) 392 { 393 next_line_num (); 394 bpout = stpcpy (bpout, line_num_print); 395 } 396 } 397 398 /* Output a currency symbol if requested (-e). */ 399 400 if (show_ends) 401 *bpout++ = '$'; 402 403 /* Output the newline. */ 404 405 *bpout++ = '\n'; 406 } 407 ch = *bpin++; 408 } 409 while (ch == '\n'); 410 411 /* Are we at the beginning of a line, and line numbers are requested? */ 412 413 if (newlines >= 0 && number) 414 { 415 next_line_num (); 416 bpout = stpcpy (bpout, line_num_print); 417 } 418 419 /* Here CH cannot contain a newline character. */ 420 421 /* The loops below continue until a newline character is found, 422 which means that the buffer is empty or that a proper newline 423 has been found. */ 424 425 /* If quoting, i.e. at least one of -v, -e, or -t specified, 426 scan for chars that need conversion. */ 427 if (show_nonprinting) 428 { 429 for (;;) 430 { 431 if (ch >= 32) 432 { 433 if (ch < 127) 434 *bpout++ = ch; 435 else if (ch == 127) 436 { 437 *bpout++ = '^'; 438 *bpout++ = '?'; 439 } 440 else 441 { 442 *bpout++ = 'M'; 443 *bpout++ = '-'; 444 if (ch >= 128 + 32) 445 { 446 if (ch < 128 + 127) 447 *bpout++ = ch - 128; 448 else 449 { 450 *bpout++ = '^'; 451 *bpout++ = '?'; 452 } 453 } 454 else 455 { 456 *bpout++ = '^'; 457 *bpout++ = ch - 128 + 64; 458 } 459 } 460 } 461 else if (ch == '\t' && !show_tabs) 462 *bpout++ = '\t'; 463 else if (ch == '\n') 464 { 465 newlines = -1; 466 break; 467 } 468 else 469 { 470 *bpout++ = '^'; 471 *bpout++ = ch + 64; 472 } 473 474 ch = *bpin++; 475 } 476 } 477 else 478 { 479 /* Not quoting, neither of -v, -e, or -t specified. */ 480 for (;;) 481 { 482 if (ch == '\t' && show_tabs) 483 { 484 *bpout++ = '^'; 485 *bpout++ = ch + 64; 486 } 487 else if (ch != '\n') 488 *bpout++ = ch; 489 else 490 { 491 newlines = -1; 492 break; 493 } 494 495 ch = *bpin++; 496 } 497 } 498 } 499} 500 501int 502main (int argc, char **argv) 503{ 504 /* Optimal size of i/o operations of output. */ 505 size_t outsize; 506 507 /* Optimal size of i/o operations of input. */ 508 size_t insize; 509 510 size_t page_size = getpagesize (); 511 512 /* Pointer to the input buffer. */ 513 char *inbuf; 514 515 /* Pointer to the output buffer. */ 516 char *outbuf; 517 518 bool ok = true; 519 int c; 520 521 /* Index in argv to processed argument. */ 522 int argind; 523 524 /* Device number of the output (file or whatever). */ 525 dev_t out_dev; 526 527 /* I-node number of the output. */ 528 ino_t out_ino; 529 530 /* True if the output file should not be the same as any input file. */ 531 bool check_redirection = true; 532 533 /* Nonzero if we have ever read standard input. */ 534 bool have_read_stdin = false; 535 536 struct stat stat_buf; 537 538 /* Variables that are set according to the specified options. */ 539 bool number = false; 540 bool number_nonblank = false; 541 bool squeeze_blank = false; 542 bool show_ends = false; 543 bool show_nonprinting = false; 544 bool show_tabs = false; 545 int file_open_mode = O_RDONLY; 546 547 static struct option const long_options[] = 548 { 549 {"number-nonblank", no_argument, NULL, 'b'}, 550 {"number", no_argument, NULL, 'n'}, 551 {"squeeze-blank", no_argument, NULL, 's'}, 552 {"show-nonprinting", no_argument, NULL, 'v'}, 553 {"show-ends", no_argument, NULL, 'E'}, 554 {"show-tabs", no_argument, NULL, 'T'}, 555 {"show-all", no_argument, NULL, 'A'}, 556 {GETOPT_HELP_OPTION_DECL}, 557 {GETOPT_VERSION_OPTION_DECL}, 558 {NULL, 0, NULL, 0} 559 }; 560 561 initialize_main (&argc, &argv); 562 set_program_name (argv[0]); 563 setlocale (LC_ALL, ""); 564 bindtextdomain (PACKAGE, LOCALEDIR); 565 textdomain (PACKAGE); 566 567 /* Arrange to close stdout if we exit via the 568 case_GETOPT_HELP_CHAR or case_GETOPT_VERSION_CHAR code. 569 Normally STDOUT_FILENO is used rather than stdout, so 570 close_stdout does nothing. */ 571 atexit (close_stdout); 572 573 /* Parse command line options. */ 574 575 while ((c = getopt_long (argc, argv, "benstuvAET", long_options, NULL)) 576 != -1) 577 { 578 switch (c) 579 { 580 case 'b': 581 number = true; 582 number_nonblank = true; 583 break; 584 585 case 'e': 586 show_ends = true; 587 show_nonprinting = true; 588 break; 589 590 case 'n': 591 number = true; 592 break; 593 594 case 's': 595 squeeze_blank = true; 596 break; 597 598 case 't': 599 show_tabs = true; 600 show_nonprinting = true; 601 break; 602 603 case 'u': 604 /* We provide the -u feature unconditionally. */ 605 break; 606 607 case 'v': 608 show_nonprinting = true; 609 break; 610 611 case 'A': 612 show_nonprinting = true; 613 show_ends = true; 614 show_tabs = true; 615 break; 616 617 case 'E': 618 show_ends = true; 619 break; 620 621 case 'T': 622 show_tabs = true; 623 break; 624 625 case_GETOPT_HELP_CHAR; 626 627 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); 628 629 default: 630 usage (EXIT_FAILURE); 631 } 632 } 633 634 /* Get device, i-node number, and optimal blocksize of output. */ 635 636 if (fstat (STDOUT_FILENO, &stat_buf) < 0) 637 error (EXIT_FAILURE, errno, _("standard output")); 638 639 outsize = io_blksize (stat_buf); 640 /* Input file can be output file for non-regular files. 641 fstat on pipes returns S_IFSOCK on some systems, S_IFIFO 642 on others, so the checking should not be done for those types, 643 and to allow things like cat < /dev/tty > /dev/tty, checking 644 is not done for device files either. */ 645 646 if (S_ISREG (stat_buf.st_mode)) 647 { 648 out_dev = stat_buf.st_dev; 649 out_ino = stat_buf.st_ino; 650 } 651 else 652 { 653 check_redirection = false; 654#ifdef lint /* Suppress `used before initialized' warning. */ 655 out_dev = 0; 656 out_ino = 0; 657#endif 658 } 659 660 if (! (number || show_ends || squeeze_blank)) 661 { 662 file_open_mode |= O_BINARY; 663 if (O_BINARY && ! isatty (STDOUT_FILENO)) 664 xfreopen (NULL, "wb", stdout); 665 } 666 667 /* Check if any of the input files are the same as the output file. */ 668 669 /* Main loop. */ 670 671 infile = "-"; 672 argind = optind; 673 674 do 675 { 676 if (argind < argc) 677 infile = argv[argind]; 678 679 if (STREQ (infile, "-")) 680 { 681 have_read_stdin = true; 682 input_desc = STDIN_FILENO; 683 if ((file_open_mode & O_BINARY) && ! isatty (STDIN_FILENO)) 684 xfreopen (NULL, "rb", stdin); 685 } 686 else 687 { 688 input_desc = open (infile, file_open_mode); 689 if (input_desc < 0) 690 { 691 error (0, errno, "%s", infile); 692 ok = false; 693 continue; 694 } 695 } 696 697 if (fstat (input_desc, &stat_buf) < 0) 698 { 699 error (0, errno, "%s", infile); 700 ok = false; 701 goto contin; 702 } 703 insize = io_blksize (stat_buf); 704 705 /* Compare the device and i-node numbers of this input file with 706 the corresponding values of the (output file associated with) 707 stdout, and skip this input file if they coincide. Input 708 files cannot be redirected to themselves. */ 709 710 if (check_redirection 711 && stat_buf.st_dev == out_dev && stat_buf.st_ino == out_ino 712 && (input_desc != STDIN_FILENO)) 713 { 714 error (0, 0, _("%s: input file is output file"), infile); 715 ok = false; 716 goto contin; 717 } 718 719 /* Select which version of `cat' to use. If any format-oriented 720 options were given use `cat'; otherwise use `simple_cat'. */ 721 722 if (! (number || show_ends || show_nonprinting 723 || show_tabs || squeeze_blank)) 724 { 725 insize = MAX (insize, outsize); 726 inbuf = xmalloc (insize + page_size - 1); 727 728 ok &= simple_cat (ptr_align (inbuf, page_size), insize); 729 } 730 else 731 { 732 inbuf = xmalloc (insize + 1 + page_size - 1); 733 734 /* Why are 735 (OUTSIZE - 1 + INSIZE * 4 + LINE_COUNTER_BUF_LEN + PAGE_SIZE - 1) 736 bytes allocated for the output buffer? 737 738 A test whether output needs to be written is done when the input 739 buffer empties or when a newline appears in the input. After 740 output is written, at most (OUTSIZE - 1) bytes will remain in the 741 buffer. Now INSIZE bytes of input is read. Each input character 742 may grow by a factor of 4 (by the prepending of M-^). If all 743 characters do, and no newlines appear in this block of input, we 744 will have at most (OUTSIZE - 1 + INSIZE * 4) bytes in the buffer. 745 If the last character in the preceding block of input was a 746 newline, a line number may be written (according to the given 747 options) as the first thing in the output buffer. (Done after the 748 new input is read, but before processing of the input begins.) 749 A line number requires seldom more than LINE_COUNTER_BUF_LEN 750 positions. 751 752 Align the output buffer to a page size boundary, for efficency on 753 some paging implementations, so add PAGE_SIZE - 1 bytes to the 754 request to make room for the alignment. */ 755 756 outbuf = xmalloc (outsize - 1 + insize * 4 + LINE_COUNTER_BUF_LEN 757 + page_size - 1); 758 759 ok &= cat (ptr_align (inbuf, page_size), insize, 760 ptr_align (outbuf, page_size), outsize, show_nonprinting, 761 show_tabs, number, number_nonblank, show_ends, 762 squeeze_blank); 763 764 free (outbuf); 765 } 766 767 free (inbuf); 768 769 contin: 770 if (!STREQ (infile, "-") && close (input_desc) < 0) 771 { 772 error (0, errno, "%s", infile); 773 ok = false; 774 } 775 } 776 while (++argind < argc); 777 778 if (have_read_stdin && close (STDIN_FILENO) < 0) 779 error (EXIT_FAILURE, errno, _("closing standard input")); 780 781 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); 782} 783