1/* cat -- concatenate files and print on the standard output.
2   Copyright (C) 1988, 1990-1991, 1995-2010 Free Software Foundation, Inc.
3
4   This program is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16
17/* Differences from the Unix cat:
18   * Always unbuffered, -u is ignored.
19   * Usually much faster than other versions of cat, the difference
20   is especially apparent when using the -v option.
21
22   By tege@sics.se, Torbjorn Granlund, advised by rms, Richard Stallman.  */
23
24#include <config.h>
25
26#include <stdio.h>
27#include <getopt.h>
28#include <sys/types.h>
29
30#if HAVE_STROPTS_H
31# include <stropts.h>
32#endif
33#if HAVE_SYS_IOCTL_H
34# include <sys/ioctl.h>
35#endif
36
37#include "system.h"
38#include "error.h"
39#include "full-write.h"
40#include "quote.h"
41#include "safe-read.h"
42#include "xfreopen.h"
43
44/* The official name of this program (e.g., no `g' prefix).  */
45#define PROGRAM_NAME "cat"
46
47#define AUTHORS \
48  proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
49  proper_name ("Richard M. Stallman")
50
51/* Name of input file.  May be "-".  */
52static char const *infile;
53
54/* Descriptor on which input file is open.  */
55static int input_desc;
56
57/* Buffer for line numbers.
58   An 11 digit counter may overflow within an hour on a P2/466,
59   an 18 digit counter needs about 1000y */
60#define LINE_COUNTER_BUF_LEN 20
61static char line_buf[LINE_COUNTER_BUF_LEN] =
62  {
63    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
64    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '0',
65    '\t', '\0'
66  };
67
68/* Position in `line_buf' where printing starts.  This will not change
69   unless the number of lines is larger than 999999.  */
70static char *line_num_print = line_buf + LINE_COUNTER_BUF_LEN - 8;
71
72/* Position of the first digit in `line_buf'.  */
73static char *line_num_start = line_buf + LINE_COUNTER_BUF_LEN - 3;
74
75/* Position of the last digit in `line_buf'.  */
76static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3;
77
78/* Preserves the `cat' function's local `newlines' between invocations.  */
79static int newlines2 = 0;
80
81void
82usage (int status)
83{
84  if (status != EXIT_SUCCESS)
85    fprintf (stderr, _("Try `%s --help' for more information.\n"),
86             program_name);
87  else
88    {
89      printf (_("\
90Usage: %s [OPTION]... [FILE]...\n\
91"),
92              program_name);
93      fputs (_("\
94Concatenate FILE(s), or standard input, to standard output.\n\
95\n\
96  -A, --show-all           equivalent to -vET\n\
97  -b, --number-nonblank    number nonempty output lines\n\
98  -e                       equivalent to -vE\n\
99  -E, --show-ends          display $ at end of each line\n\
100  -n, --number             number all output lines\n\
101  -s, --squeeze-blank      suppress repeated empty output lines\n\
102"), stdout);
103      fputs (_("\
104  -t                       equivalent to -vT\n\
105  -T, --show-tabs          display TAB characters as ^I\n\
106  -u                       (ignored)\n\
107  -v, --show-nonprinting   use ^ and M- notation, except for LFD and TAB\n\
108"), stdout);
109      fputs (HELP_OPTION_DESCRIPTION, stdout);
110      fputs (VERSION_OPTION_DESCRIPTION, stdout);
111      fputs (_("\
112\n\
113With no FILE, or when FILE is -, read standard input.\n\
114"), stdout);
115      printf (_("\
116\n\
117Examples:\n\
118  %s f - g  Output f's contents, then standard input, then g's contents.\n\
119  %s        Copy standard input to standard output.\n\
120"),
121              program_name, program_name);
122      emit_ancillary_info ();
123    }
124  exit (status);
125}
126
127/* Compute the next line number.  */
128
129static void
130next_line_num (void)
131{
132  char *endp = line_num_end;
133  do
134    {
135      if ((*endp)++ < '9')
136        return;
137      *endp-- = '0';
138    }
139  while (endp >= line_num_start);
140  if (line_num_start > line_buf)
141    *--line_num_start = '1';
142  else
143    *line_buf = '>';
144  if (line_num_start < line_num_print)
145    line_num_print--;
146}
147
148/* Plain cat.  Copies the file behind `input_desc' to STDOUT_FILENO.
149   Return true if successful.  */
150
151static bool
152simple_cat (
153     /* Pointer to the buffer, used by reads and writes.  */
154     char *buf,
155
156     /* Number of characters preferably read or written by each read and write
157        call.  */
158     size_t bufsize)
159{
160  /* Actual number of characters read, and therefore written.  */
161  size_t n_read;
162
163  /* Loop until the end of the file.  */
164
165  for (;;)
166    {
167      /* Read a block of input.  */
168
169      n_read = safe_read (input_desc, buf, bufsize);
170      if (n_read == SAFE_READ_ERROR)
171        {
172          error (0, errno, "%s", infile);
173          return false;
174        }
175
176      /* End of this file?  */
177
178      if (n_read == 0)
179        return true;
180
181      /* Write this block out.  */
182
183      {
184        /* The following is ok, since we know that 0 < n_read.  */
185        size_t n = n_read;
186        if (full_write (STDOUT_FILENO, buf, n) != n)
187          error (EXIT_FAILURE, errno, _("write error"));
188      }
189    }
190}
191
192/* Write any pending output to STDOUT_FILENO.
193   Pending is defined to be the *BPOUT - OUTBUF bytes starting at OUTBUF.
194   Then set *BPOUT to OUTPUT if it's not already that value.  */
195
196static inline void
197write_pending (char *outbuf, char **bpout)
198{
199  size_t n_write = *bpout - outbuf;
200  if (0 < n_write)
201    {
202      if (full_write (STDOUT_FILENO, outbuf, n_write) != n_write)
203        error (EXIT_FAILURE, errno, _("write error"));
204      *bpout = outbuf;
205    }
206}
207
208/* Cat the file behind INPUT_DESC to the file behind OUTPUT_DESC.
209   Return true if successful.
210   Called if any option more than -u was specified.
211
212   A newline character is always put at the end of the buffer, to make
213   an explicit test for buffer end unnecessary.  */
214
215static bool
216cat (
217     /* Pointer to the beginning of the input buffer.  */
218     char *inbuf,
219
220     /* Number of characters read in each read call.  */
221     size_t insize,
222
223     /* Pointer to the beginning of the output buffer.  */
224     char *outbuf,
225
226     /* Number of characters written by each write call.  */
227     size_t outsize,
228
229     /* Variables that have values according to the specified options.  */
230     bool show_nonprinting,
231     bool show_tabs,
232     bool number,
233     bool number_nonblank,
234     bool show_ends,
235     bool squeeze_blank)
236{
237  /* Last character read from the input buffer.  */
238  unsigned char ch;
239
240  /* Pointer to the next character in the input buffer.  */
241  char *bpin;
242
243  /* Pointer to the first non-valid byte in the input buffer, i.e. the
244     current end of the buffer.  */
245  char *eob;
246
247  /* Pointer to the position where the next character shall be written.  */
248  char *bpout;
249
250  /* Number of characters read by the last read call.  */
251  size_t n_read;
252
253  /* Determines how many consecutive newlines there have been in the
254     input.  0 newlines makes NEWLINES -1, 1 newline makes NEWLINES 1,
255     etc.  Initially 0 to indicate that we are at the beginning of a
256     new line.  The "state" of the procedure is determined by
257     NEWLINES.  */
258  int newlines = newlines2;
259
260#ifdef FIONREAD
261  /* If nonzero, use the FIONREAD ioctl, as an optimization.
262     (On Ultrix, it is not supported on NFS file systems.)  */
263  bool use_fionread = true;
264#endif
265
266  /* The inbuf pointers are initialized so that BPIN > EOB, and thereby input
267     is read immediately.  */
268
269  eob = inbuf;
270  bpin = eob + 1;
271
272  bpout = outbuf;
273
274  for (;;)
275    {
276      do
277        {
278          /* Write if there are at least OUTSIZE bytes in OUTBUF.  */
279
280          if (outbuf + outsize <= bpout)
281            {
282              char *wp = outbuf;
283              size_t remaining_bytes;
284              do
285                {
286                  if (full_write (STDOUT_FILENO, wp, outsize) != outsize)
287                    error (EXIT_FAILURE, errno, _("write error"));
288                  wp += outsize;
289                  remaining_bytes = bpout - wp;
290                }
291              while (outsize <= remaining_bytes);
292
293              /* Move the remaining bytes to the beginning of the
294                 buffer.  */
295
296              memmove (outbuf, wp, remaining_bytes);
297              bpout = outbuf + remaining_bytes;
298            }
299
300          /* Is INBUF empty?  */
301
302          if (bpin > eob)
303            {
304              bool input_pending = false;
305#ifdef FIONREAD
306              int n_to_read = 0;
307
308              /* Is there any input to read immediately?
309                 If not, we are about to wait,
310                 so write all buffered output before waiting.  */
311
312              if (use_fionread
313                  && ioctl (input_desc, FIONREAD, &n_to_read) < 0)
314                {
315                  /* Ultrix returns EOPNOTSUPP on NFS;
316                     HP-UX returns ENOTTY on pipes.
317                     SunOS returns EINVAL and
318                     More/BSD returns ENODEV on special files
319                     like /dev/null.
320                     Irix-5 returns ENOSYS on pipes.  */
321                  if (errno == EOPNOTSUPP || errno == ENOTTY
322                      || errno == EINVAL || errno == ENODEV
323                      || errno == ENOSYS)
324                    use_fionread = false;
325                  else
326                    {
327                      error (0, errno, _("cannot do ioctl on %s"), quote (infile));
328                      newlines2 = newlines;
329                      return false;
330                    }
331                }
332              if (n_to_read != 0)
333                input_pending = true;
334#endif
335
336              if (!input_pending)
337                write_pending (outbuf, &bpout);
338
339              /* Read more input into INBUF.  */
340
341              n_read = safe_read (input_desc, inbuf, insize);
342              if (n_read == SAFE_READ_ERROR)
343                {
344                  error (0, errno, "%s", infile);
345                  write_pending (outbuf, &bpout);
346                  newlines2 = newlines;
347                  return false;
348                }
349              if (n_read == 0)
350                {
351                  write_pending (outbuf, &bpout);
352                  newlines2 = newlines;
353                  return true;
354                }
355
356              /* Update the pointers and insert a sentinel at the buffer
357                 end.  */
358
359              bpin = inbuf;
360              eob = bpin + n_read;
361              *eob = '\n';
362            }
363          else
364            {
365              /* It was a real (not a sentinel) newline.  */
366
367              /* Was the last line empty?
368                 (i.e. have two or more consecutive newlines been read?)  */
369
370              if (++newlines > 0)
371                {
372                  if (newlines >= 2)
373                    {
374                      /* Limit this to 2 here.  Otherwise, with lots of
375                         consecutive newlines, the counter could wrap
376                         around at INT_MAX.  */
377                      newlines = 2;
378
379                      /* Are multiple adjacent empty lines to be substituted
380                         by single ditto (-s), and this was the second empty
381                         line?  */
382                      if (squeeze_blank)
383                        {
384                          ch = *bpin++;
385                          continue;
386                        }
387                    }
388
389                  /* Are line numbers to be written at empty lines (-n)?  */
390
391                  if (number && !number_nonblank)
392                    {
393                      next_line_num ();
394                      bpout = stpcpy (bpout, line_num_print);
395                    }
396                }
397
398              /* Output a currency symbol if requested (-e).  */
399
400              if (show_ends)
401                *bpout++ = '$';
402
403              /* Output the newline.  */
404
405              *bpout++ = '\n';
406            }
407          ch = *bpin++;
408        }
409      while (ch == '\n');
410
411      /* Are we at the beginning of a line, and line numbers are requested?  */
412
413      if (newlines >= 0 && number)
414        {
415          next_line_num ();
416          bpout = stpcpy (bpout, line_num_print);
417        }
418
419      /* Here CH cannot contain a newline character.  */
420
421      /* The loops below continue until a newline character is found,
422         which means that the buffer is empty or that a proper newline
423         has been found.  */
424
425      /* If quoting, i.e. at least one of -v, -e, or -t specified,
426         scan for chars that need conversion.  */
427      if (show_nonprinting)
428        {
429          for (;;)
430            {
431              if (ch >= 32)
432                {
433                  if (ch < 127)
434                    *bpout++ = ch;
435                  else if (ch == 127)
436                    {
437                      *bpout++ = '^';
438                      *bpout++ = '?';
439                    }
440                  else
441                    {
442                      *bpout++ = 'M';
443                      *bpout++ = '-';
444                      if (ch >= 128 + 32)
445                        {
446                          if (ch < 128 + 127)
447                            *bpout++ = ch - 128;
448                          else
449                            {
450                              *bpout++ = '^';
451                              *bpout++ = '?';
452                            }
453                        }
454                      else
455                        {
456                          *bpout++ = '^';
457                          *bpout++ = ch - 128 + 64;
458                        }
459                    }
460                }
461              else if (ch == '\t' && !show_tabs)
462                *bpout++ = '\t';
463              else if (ch == '\n')
464                {
465                  newlines = -1;
466                  break;
467                }
468              else
469                {
470                  *bpout++ = '^';
471                  *bpout++ = ch + 64;
472                }
473
474              ch = *bpin++;
475            }
476        }
477      else
478        {
479          /* Not quoting, neither of -v, -e, or -t specified.  */
480          for (;;)
481            {
482              if (ch == '\t' && show_tabs)
483                {
484                  *bpout++ = '^';
485                  *bpout++ = ch + 64;
486                }
487              else if (ch != '\n')
488                *bpout++ = ch;
489              else
490                {
491                  newlines = -1;
492                  break;
493                }
494
495              ch = *bpin++;
496            }
497        }
498    }
499}
500
501int
502main (int argc, char **argv)
503{
504  /* Optimal size of i/o operations of output.  */
505  size_t outsize;
506
507  /* Optimal size of i/o operations of input.  */
508  size_t insize;
509
510  size_t page_size = getpagesize ();
511
512  /* Pointer to the input buffer.  */
513  char *inbuf;
514
515  /* Pointer to the output buffer.  */
516  char *outbuf;
517
518  bool ok = true;
519  int c;
520
521  /* Index in argv to processed argument.  */
522  int argind;
523
524  /* Device number of the output (file or whatever).  */
525  dev_t out_dev;
526
527  /* I-node number of the output.  */
528  ino_t out_ino;
529
530  /* True if the output file should not be the same as any input file.  */
531  bool check_redirection = true;
532
533  /* Nonzero if we have ever read standard input.  */
534  bool have_read_stdin = false;
535
536  struct stat stat_buf;
537
538  /* Variables that are set according to the specified options.  */
539  bool number = false;
540  bool number_nonblank = false;
541  bool squeeze_blank = false;
542  bool show_ends = false;
543  bool show_nonprinting = false;
544  bool show_tabs = false;
545  int file_open_mode = O_RDONLY;
546
547  static struct option const long_options[] =
548  {
549    {"number-nonblank", no_argument, NULL, 'b'},
550    {"number", no_argument, NULL, 'n'},
551    {"squeeze-blank", no_argument, NULL, 's'},
552    {"show-nonprinting", no_argument, NULL, 'v'},
553    {"show-ends", no_argument, NULL, 'E'},
554    {"show-tabs", no_argument, NULL, 'T'},
555    {"show-all", no_argument, NULL, 'A'},
556    {GETOPT_HELP_OPTION_DECL},
557    {GETOPT_VERSION_OPTION_DECL},
558    {NULL, 0, NULL, 0}
559  };
560
561  initialize_main (&argc, &argv);
562  set_program_name (argv[0]);
563  setlocale (LC_ALL, "");
564  bindtextdomain (PACKAGE, LOCALEDIR);
565  textdomain (PACKAGE);
566
567  /* Arrange to close stdout if we exit via the
568     case_GETOPT_HELP_CHAR or case_GETOPT_VERSION_CHAR code.
569     Normally STDOUT_FILENO is used rather than stdout, so
570     close_stdout does nothing.  */
571  atexit (close_stdout);
572
573  /* Parse command line options.  */
574
575  while ((c = getopt_long (argc, argv, "benstuvAET", long_options, NULL))
576         != -1)
577    {
578      switch (c)
579        {
580        case 'b':
581          number = true;
582          number_nonblank = true;
583          break;
584
585        case 'e':
586          show_ends = true;
587          show_nonprinting = true;
588          break;
589
590        case 'n':
591          number = true;
592          break;
593
594        case 's':
595          squeeze_blank = true;
596          break;
597
598        case 't':
599          show_tabs = true;
600          show_nonprinting = true;
601          break;
602
603        case 'u':
604          /* We provide the -u feature unconditionally.  */
605          break;
606
607        case 'v':
608          show_nonprinting = true;
609          break;
610
611        case 'A':
612          show_nonprinting = true;
613          show_ends = true;
614          show_tabs = true;
615          break;
616
617        case 'E':
618          show_ends = true;
619          break;
620
621        case 'T':
622          show_tabs = true;
623          break;
624
625        case_GETOPT_HELP_CHAR;
626
627        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
628
629        default:
630          usage (EXIT_FAILURE);
631        }
632    }
633
634  /* Get device, i-node number, and optimal blocksize of output.  */
635
636  if (fstat (STDOUT_FILENO, &stat_buf) < 0)
637    error (EXIT_FAILURE, errno, _("standard output"));
638
639  outsize = io_blksize (stat_buf);
640  /* Input file can be output file for non-regular files.
641     fstat on pipes returns S_IFSOCK on some systems, S_IFIFO
642     on others, so the checking should not be done for those types,
643     and to allow things like cat < /dev/tty > /dev/tty, checking
644     is not done for device files either.  */
645
646  if (S_ISREG (stat_buf.st_mode))
647    {
648      out_dev = stat_buf.st_dev;
649      out_ino = stat_buf.st_ino;
650    }
651  else
652    {
653      check_redirection = false;
654#ifdef lint  /* Suppress `used before initialized' warning.  */
655      out_dev = 0;
656      out_ino = 0;
657#endif
658    }
659
660  if (! (number || show_ends || squeeze_blank))
661    {
662      file_open_mode |= O_BINARY;
663      if (O_BINARY && ! isatty (STDOUT_FILENO))
664        xfreopen (NULL, "wb", stdout);
665    }
666
667  /* Check if any of the input files are the same as the output file.  */
668
669  /* Main loop.  */
670
671  infile = "-";
672  argind = optind;
673
674  do
675    {
676      if (argind < argc)
677        infile = argv[argind];
678
679      if (STREQ (infile, "-"))
680        {
681          have_read_stdin = true;
682          input_desc = STDIN_FILENO;
683          if ((file_open_mode & O_BINARY) && ! isatty (STDIN_FILENO))
684            xfreopen (NULL, "rb", stdin);
685        }
686      else
687        {
688          input_desc = open (infile, file_open_mode);
689          if (input_desc < 0)
690            {
691              error (0, errno, "%s", infile);
692              ok = false;
693              continue;
694            }
695        }
696
697      if (fstat (input_desc, &stat_buf) < 0)
698        {
699          error (0, errno, "%s", infile);
700          ok = false;
701          goto contin;
702        }
703      insize = io_blksize (stat_buf);
704
705      /* Compare the device and i-node numbers of this input file with
706         the corresponding values of the (output file associated with)
707         stdout, and skip this input file if they coincide.  Input
708         files cannot be redirected to themselves.  */
709
710      if (check_redirection
711          && stat_buf.st_dev == out_dev && stat_buf.st_ino == out_ino
712          && (input_desc != STDIN_FILENO))
713        {
714          error (0, 0, _("%s: input file is output file"), infile);
715          ok = false;
716          goto contin;
717        }
718
719      /* Select which version of `cat' to use.  If any format-oriented
720         options were given use `cat'; otherwise use `simple_cat'.  */
721
722      if (! (number || show_ends || show_nonprinting
723             || show_tabs || squeeze_blank))
724        {
725          insize = MAX (insize, outsize);
726          inbuf = xmalloc (insize + page_size - 1);
727
728          ok &= simple_cat (ptr_align (inbuf, page_size), insize);
729        }
730      else
731        {
732          inbuf = xmalloc (insize + 1 + page_size - 1);
733
734          /* Why are
735             (OUTSIZE - 1 + INSIZE * 4 + LINE_COUNTER_BUF_LEN + PAGE_SIZE - 1)
736             bytes allocated for the output buffer?
737
738             A test whether output needs to be written is done when the input
739             buffer empties or when a newline appears in the input.  After
740             output is written, at most (OUTSIZE - 1) bytes will remain in the
741             buffer.  Now INSIZE bytes of input is read.  Each input character
742             may grow by a factor of 4 (by the prepending of M-^).  If all
743             characters do, and no newlines appear in this block of input, we
744             will have at most (OUTSIZE - 1 + INSIZE * 4) bytes in the buffer.
745             If the last character in the preceding block of input was a
746             newline, a line number may be written (according to the given
747             options) as the first thing in the output buffer. (Done after the
748             new input is read, but before processing of the input begins.)
749             A line number requires seldom more than LINE_COUNTER_BUF_LEN
750             positions.
751
752             Align the output buffer to a page size boundary, for efficency on
753             some paging implementations, so add PAGE_SIZE - 1 bytes to the
754             request to make room for the alignment.  */
755
756          outbuf = xmalloc (outsize - 1 + insize * 4 + LINE_COUNTER_BUF_LEN
757                            + page_size - 1);
758
759          ok &= cat (ptr_align (inbuf, page_size), insize,
760                     ptr_align (outbuf, page_size), outsize, show_nonprinting,
761                     show_tabs, number, number_nonblank, show_ends,
762                     squeeze_blank);
763
764          free (outbuf);
765        }
766
767      free (inbuf);
768
769    contin:
770      if (!STREQ (infile, "-") && close (input_desc) < 0)
771        {
772          error (0, errno, "%s", infile);
773          ok = false;
774        }
775    }
776  while (++argind < argc);
777
778  if (have_read_stdin && close (STDIN_FILENO) < 0)
779    error (EXIT_FAILURE, errno, _("closing standard input"));
780
781  exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
782}
783