1/* tac - concatenate and print files in reverse
2   Copyright (C) 1988-1991, 1995-2006, 2008-2010 Free Software Foundation, Inc.
3
4   This program is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16
17/* Written by Jay Lepreau (lepreau@cs.utah.edu).
18   GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
19
20/* Copy each FILE, or the standard input if none are given or when a
21   FILE name of "-" is encountered, to the standard output with the
22   order of the records reversed.  The records are separated by
23   instances of a string, or a newline if none is given.  By default, the
24   separator string is attached to the end of the record that it
25   follows in the file.
26
27   Options:
28   -b, --before			The separator is attached to the beginning
29                                of the record that it precedes in the file.
30   -r, --regex			The separator is a regular expression.
31   -s, --separator=separator	Use SEPARATOR as the record separator.
32
33   To reverse a file byte by byte, use (in bash, ksh, or sh):
34tac -r -s '.\|
35' file */
36
37#include <config.h>
38
39#include <stdio.h>
40#include <getopt.h>
41#include <sys/types.h>
42#include "system.h"
43
44#include <regex.h>
45
46#include "error.h"
47#include "quote.h"
48#include "quotearg.h"
49#include "safe-read.h"
50#include "stdlib--.h"
51#include "xfreopen.h"
52
53/* The official name of this program (e.g., no `g' prefix).  */
54#define PROGRAM_NAME "tac"
55
56#define AUTHORS \
57  proper_name ("Jay Lepreau"), \
58  proper_name ("David MacKenzie")
59
60#if defined __MSDOS__ || defined _WIN32
61/* Define this to non-zero on systems for which the regular mechanism
62   (of unlinking an open file and expecting to be able to write, seek
63   back to the beginning, then reread it) doesn't work.  E.g., on Windows
64   and DOS systems.  */
65# define DONT_UNLINK_WHILE_OPEN 1
66#endif
67
68
69#ifndef DEFAULT_TMPDIR
70# define DEFAULT_TMPDIR "/tmp"
71#endif
72
73/* The number of bytes per atomic read. */
74#define INITIAL_READSIZE 8192
75
76/* The number of bytes per atomic write. */
77#define WRITESIZE 8192
78
79/* The string that separates the records of the file. */
80static char const *separator;
81
82/* True if we have ever read standard input.  */
83static bool have_read_stdin = false;
84
85/* If true, print `separator' along with the record preceding it
86   in the file; otherwise with the record following it. */
87static bool separator_ends_record;
88
89/* 0 if `separator' is to be matched as a regular expression;
90   otherwise, the length of `separator', used as a sentinel to
91   stop the search. */
92static size_t sentinel_length;
93
94/* The length of a match with `separator'.  If `sentinel_length' is 0,
95   `match_length' is computed every time a match succeeds;
96   otherwise, it is simply the length of `separator'. */
97static size_t match_length;
98
99/* The input buffer. */
100static char *G_buffer;
101
102/* The number of bytes to read at once into `buffer'. */
103static size_t read_size;
104
105/* The size of `buffer'.  This is read_size * 2 + sentinel_length + 2.
106   The extra 2 bytes allow `past_end' to have a value beyond the
107   end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
108static size_t G_buffer_size;
109
110/* The compiled regular expression representing `separator'. */
111static struct re_pattern_buffer compiled_separator;
112static char compiled_separator_fastmap[UCHAR_MAX + 1];
113static struct re_registers regs;
114
115static struct option const longopts[] =
116{
117  {"before", no_argument, NULL, 'b'},
118  {"regex", no_argument, NULL, 'r'},
119  {"separator", required_argument, NULL, 's'},
120  {GETOPT_HELP_OPTION_DECL},
121  {GETOPT_VERSION_OPTION_DECL},
122  {NULL, 0, NULL, 0}
123};
124
125void
126usage (int status)
127{
128  if (status != EXIT_SUCCESS)
129    fprintf (stderr, _("Try `%s --help' for more information.\n"),
130             program_name);
131  else
132    {
133      printf (_("\
134Usage: %s [OPTION]... [FILE]...\n\
135"),
136              program_name);
137      fputs (_("\
138Write each FILE to standard output, last line first.\n\
139With no FILE, or when FILE is -, read standard input.\n\
140\n\
141"), stdout);
142      fputs (_("\
143Mandatory arguments to long options are mandatory for short options too.\n\
144"), stdout);
145      fputs (_("\
146  -b, --before             attach the separator before instead of after\n\
147  -r, --regex              interpret the separator as a regular expression\n\
148  -s, --separator=STRING   use STRING as the separator instead of newline\n\
149"), stdout);
150      fputs (HELP_OPTION_DESCRIPTION, stdout);
151      fputs (VERSION_OPTION_DESCRIPTION, stdout);
152      emit_ancillary_info ();
153    }
154  exit (status);
155}
156
157/* Print the characters from START to PAST_END - 1.
158   If START is NULL, just flush the buffer. */
159
160static void
161output (const char *start, const char *past_end)
162{
163  static char buffer[WRITESIZE];
164  static size_t bytes_in_buffer = 0;
165  size_t bytes_to_add = past_end - start;
166  size_t bytes_available = WRITESIZE - bytes_in_buffer;
167
168  if (start == 0)
169    {
170      fwrite (buffer, 1, bytes_in_buffer, stdout);
171      bytes_in_buffer = 0;
172      return;
173    }
174
175  /* Write out as many full buffers as possible. */
176  while (bytes_to_add >= bytes_available)
177    {
178      memcpy (buffer + bytes_in_buffer, start, bytes_available);
179      bytes_to_add -= bytes_available;
180      start += bytes_available;
181      fwrite (buffer, 1, WRITESIZE, stdout);
182      bytes_in_buffer = 0;
183      bytes_available = WRITESIZE;
184    }
185
186  memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
187  bytes_in_buffer += bytes_to_add;
188}
189
190/* Print in reverse the file open on descriptor FD for reading FILE.
191   Return true if successful.  */
192
193static bool
194tac_seekable (int input_fd, const char *file)
195{
196  /* Pointer to the location in `G_buffer' where the search for
197     the next separator will begin. */
198  char *match_start;
199
200  /* Pointer to one past the rightmost character in `G_buffer' that
201     has not been printed yet. */
202  char *past_end;
203
204  /* Length of the record growing in `G_buffer'. */
205  size_t saved_record_size;
206
207  /* Offset in the file of the next read. */
208  off_t file_pos;
209
210  /* True if `output' has not been called yet for any file.
211     Only used when the separator is attached to the preceding record. */
212  bool first_time = true;
213  char first_char = *separator;	/* Speed optimization, non-regexp. */
214  char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
215  size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
216
217  /* Find the size of the input file. */
218  file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
219  if (file_pos < 1)
220    return true;			/* It's an empty file. */
221
222  /* Arrange for the first read to lop off enough to leave the rest of the
223     file a multiple of `read_size'.  Since `read_size' can change, this may
224     not always hold during the program run, but since it usually will, leave
225     it here for i/o efficiency (page/sector boundaries and all that).
226     Note: the efficiency gain has not been verified. */
227  saved_record_size = file_pos % read_size;
228  if (saved_record_size == 0)
229    saved_record_size = read_size;
230  file_pos -= saved_record_size;
231  /* `file_pos' now points to the start of the last (probably partial) block
232     in the input file. */
233
234  if (lseek (input_fd, file_pos, SEEK_SET) < 0)
235    error (0, errno, _("%s: seek failed"), quotearg_colon (file));
236
237  if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
238    {
239      error (0, errno, _("%s: read error"), quotearg_colon (file));
240      return false;
241    }
242
243  match_start = past_end = G_buffer + saved_record_size;
244  /* For non-regexp search, move past impossible positions for a match. */
245  if (sentinel_length)
246    match_start -= match_length1;
247
248  for (;;)
249    {
250      /* Search backward from `match_start' - 1 to `G_buffer' for a match
251         with `separator'; for speed, use strncmp if `separator' contains no
252         metacharacters.
253         If the match succeeds, set `match_start' to point to the start of
254         the match and `match_length' to the length of the match.
255         Otherwise, make `match_start' < `G_buffer'. */
256      if (sentinel_length == 0)
257        {
258          size_t i = match_start - G_buffer;
259          regoff_t ri = i;
260          regoff_t range = 1 - ri;
261          regoff_t ret;
262
263          if (1 < range)
264            error (EXIT_FAILURE, 0, _("record too large"));
265
266          if (range == 1
267              || ((ret = re_search (&compiled_separator, G_buffer,
268                                    i, i - 1, range, &regs))
269                  == -1))
270            match_start = G_buffer - 1;
271          else if (ret == -2)
272            {
273              error (EXIT_FAILURE, 0,
274                     _("error in regular expression search"));
275            }
276          else
277            {
278              match_start = G_buffer + regs.start[0];
279              match_length = regs.end[0] - regs.start[0];
280            }
281        }
282      else
283        {
284          /* `match_length' is constant for non-regexp boundaries. */
285          while (*--match_start != first_char
286                 || (match_length1 && strncmp (match_start + 1, separator1,
287                                               match_length1)))
288            /* Do nothing. */ ;
289        }
290
291      /* Check whether we backed off the front of `G_buffer' without finding
292         a match for `separator'. */
293      if (match_start < G_buffer)
294        {
295          if (file_pos == 0)
296            {
297              /* Hit the beginning of the file; print the remaining record. */
298              output (G_buffer, past_end);
299              return true;
300            }
301
302          saved_record_size = past_end - G_buffer;
303          if (saved_record_size > read_size)
304            {
305              /* `G_buffer_size' is about twice `read_size', so since
306                 we want to read in another `read_size' bytes before
307                 the data already in `G_buffer', we need to increase
308                 `G_buffer_size'. */
309              char *newbuffer;
310              size_t offset = sentinel_length ? sentinel_length : 1;
311              ptrdiff_t match_start_offset = match_start - G_buffer;
312              ptrdiff_t past_end_offset = past_end - G_buffer;
313              size_t old_G_buffer_size = G_buffer_size;
314
315              read_size *= 2;
316              G_buffer_size = read_size * 2 + sentinel_length + 2;
317              if (G_buffer_size < old_G_buffer_size)
318                xalloc_die ();
319              newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
320              newbuffer += offset;
321              /* Adjust the pointers for the new buffer location.  */
322              match_start = newbuffer + match_start_offset;
323              past_end = newbuffer + past_end_offset;
324              G_buffer = newbuffer;
325            }
326
327          /* Back up to the start of the next bufferfull of the file.  */
328          if (file_pos >= read_size)
329            file_pos -= read_size;
330          else
331            {
332              read_size = file_pos;
333              file_pos = 0;
334            }
335          if (lseek (input_fd, file_pos, SEEK_SET) < 0)
336            error (0, errno, _("%s: seek failed"), quotearg_colon (file));
337
338          /* Shift the pending record data right to make room for the new.
339             The source and destination regions probably overlap.  */
340          memmove (G_buffer + read_size, G_buffer, saved_record_size);
341          past_end = G_buffer + read_size + saved_record_size;
342          /* For non-regexp searches, avoid unneccessary scanning. */
343          if (sentinel_length)
344            match_start = G_buffer + read_size;
345          else
346            match_start = past_end;
347
348          if (safe_read (input_fd, G_buffer, read_size) != read_size)
349            {
350              error (0, errno, _("%s: read error"), quotearg_colon (file));
351              return false;
352            }
353        }
354      else
355        {
356          /* Found a match of `separator'. */
357          if (separator_ends_record)
358            {
359              char *match_end = match_start + match_length;
360
361              /* If this match of `separator' isn't at the end of the
362                 file, print the record. */
363              if (!first_time || match_end != past_end)
364                output (match_end, past_end);
365              past_end = match_end;
366              first_time = false;
367            }
368          else
369            {
370              output (match_start, past_end);
371              past_end = match_start;
372            }
373
374          /* For non-regex matching, we can back up.  */
375          if (sentinel_length > 0)
376            match_start -= match_length - 1;
377        }
378    }
379}
380
381#if DONT_UNLINK_WHILE_OPEN
382
383/* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
384   Using atexit like this is wrong, since it can fail
385   when called e.g. 32 or more times.
386   But this isn't a big deal, since the code is used only on WOE/DOS
387   systems, and few people invoke tac on that many nonseekable files.  */
388
389static const char *file_to_remove;
390static FILE *fp_to_close;
391
392static void
393unlink_tempfile (void)
394{
395  fclose (fp_to_close);
396  unlink (file_to_remove);
397}
398
399static void
400record_or_unlink_tempfile (char const *fn, FILE *fp)
401{
402  if (!file_to_remove)
403    {
404      file_to_remove = fn;
405      fp_to_close = fp;
406      atexit (unlink_tempfile);
407    }
408}
409
410#else
411
412static void
413record_or_unlink_tempfile (char const *fn, FILE *fp ATTRIBUTE_UNUSED)
414{
415  unlink (fn);
416}
417
418#endif
419
420/* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
421   a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
422   and file name.  Return true if successful.  */
423
424static bool
425copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
426{
427  static char *template = NULL;
428  static char const *tempdir;
429  char *tempfile;
430  FILE *tmp;
431  int fd;
432
433  if (template == NULL)
434    {
435      char const * const Template = "%s/tacXXXXXX";
436      tempdir = getenv ("TMPDIR");
437      if (tempdir == NULL)
438        tempdir = DEFAULT_TMPDIR;
439
440      /* Subtract 2 for `%s' and add 1 for the trailing NUL byte.  */
441      template = xmalloc (strlen (tempdir) + strlen (Template) - 2 + 1);
442      sprintf (template, Template, tempdir);
443    }
444
445  /* FIXME: there's a small window between a successful mkstemp call
446     and the unlink that's performed by record_or_unlink_tempfile.
447     If we're interrupted in that interval, this code fails to remove
448     the temporary file.  On systems that define DONT_UNLINK_WHILE_OPEN,
449     the window is much larger -- it extends to the atexit-called
450     unlink_tempfile.
451     FIXME: clean up upon fatal signal.  Don't block them, in case
452     $TMPFILE is a remote file system.  */
453
454  tempfile = template;
455  fd = mkstemp (template);
456  if (fd < 0)
457    {
458      error (0, errno, _("cannot create temporary file in %s"),
459             quote (tempdir));
460      return false;
461    }
462
463  tmp = fdopen (fd, (O_BINARY ? "w+b" : "w+"));
464  if (! tmp)
465    {
466      error (0, errno, _("cannot open %s for writing"), quote (tempfile));
467      close (fd);
468      unlink (tempfile);
469      return false;
470    }
471
472  record_or_unlink_tempfile (tempfile, tmp);
473
474  while (1)
475    {
476      size_t bytes_read = safe_read (input_fd, G_buffer, read_size);
477      if (bytes_read == 0)
478        break;
479      if (bytes_read == SAFE_READ_ERROR)
480        {
481          error (0, errno, _("%s: read error"), quotearg_colon (file));
482          goto Fail;
483        }
484
485      if (fwrite (G_buffer, 1, bytes_read, tmp) != bytes_read)
486        {
487          error (0, errno, _("%s: write error"), quotearg_colon (tempfile));
488          goto Fail;
489        }
490    }
491
492  if (fflush (tmp) != 0)
493    {
494      error (0, errno, _("%s: write error"), quotearg_colon (tempfile));
495      goto Fail;
496    }
497
498  *g_tmp = tmp;
499  *g_tempfile = tempfile;
500  return true;
501
502 Fail:
503  fclose (tmp);
504  return false;
505}
506
507/* Copy INPUT_FD to a temporary, then tac that file.
508   Return true if successful.  */
509
510static bool
511tac_nonseekable (int input_fd, const char *file)
512{
513  FILE *tmp_stream;
514  char *tmp_file;
515  return (copy_to_temp (&tmp_stream, &tmp_file, input_fd, file)
516          && tac_seekable (fileno (tmp_stream), tmp_file));
517}
518
519/* Print FILE in reverse, copying it to a temporary
520   file first if it is not seekable.
521   Return true if successful.  */
522
523static bool
524tac_file (const char *filename)
525{
526  bool ok;
527  off_t file_size;
528  int fd;
529  bool is_stdin = STREQ (filename, "-");
530
531  if (is_stdin)
532    {
533      have_read_stdin = true;
534      fd = STDIN_FILENO;
535      filename = _("standard input");
536      if (O_BINARY && ! isatty (STDIN_FILENO))
537        xfreopen (NULL, "rb", stdin);
538    }
539  else
540    {
541      fd = open (filename, O_RDONLY | O_BINARY);
542      if (fd < 0)
543        {
544          error (0, errno, _("cannot open %s for reading"), quote (filename));
545          return false;
546        }
547    }
548
549  file_size = lseek (fd, (off_t) 0, SEEK_END);
550
551  ok = (file_size < 0 || isatty (fd)
552        ? tac_nonseekable (fd, filename)
553        : tac_seekable (fd, filename));
554
555  if (!is_stdin && close (fd) != 0)
556    {
557      error (0, errno, _("%s: read error"), quotearg_colon (filename));
558      ok = false;
559    }
560  return ok;
561}
562
563int
564main (int argc, char **argv)
565{
566  const char *error_message;	/* Return value from re_compile_pattern. */
567  int optc;
568  bool ok;
569  size_t half_buffer_size;
570
571  /* Initializer for file_list if no file-arguments
572     were specified on the command line.  */
573  static char const *const default_file_list[] = {"-", NULL};
574  char const *const *file;
575
576  initialize_main (&argc, &argv);
577  set_program_name (argv[0]);
578  setlocale (LC_ALL, "");
579  bindtextdomain (PACKAGE, LOCALEDIR);
580  textdomain (PACKAGE);
581
582  atexit (close_stdout);
583
584  separator = "\n";
585  sentinel_length = 1;
586  separator_ends_record = true;
587
588  while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
589    {
590      switch (optc)
591        {
592        case 'b':
593          separator_ends_record = false;
594          break;
595        case 'r':
596          sentinel_length = 0;
597          break;
598        case 's':
599          separator = optarg;
600          if (*separator == 0)
601            error (EXIT_FAILURE, 0, _("separator cannot be empty"));
602          break;
603        case_GETOPT_HELP_CHAR;
604        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
605        default:
606          usage (EXIT_FAILURE);
607        }
608    }
609
610  if (sentinel_length == 0)
611    {
612      compiled_separator.buffer = NULL;
613      compiled_separator.allocated = 0;
614      compiled_separator.fastmap = compiled_separator_fastmap;
615      compiled_separator.translate = NULL;
616      error_message = re_compile_pattern (separator, strlen (separator),
617                                          &compiled_separator);
618      if (error_message)
619        error (EXIT_FAILURE, 0, "%s", error_message);
620    }
621  else
622    match_length = sentinel_length = strlen (separator);
623
624  read_size = INITIAL_READSIZE;
625  while (sentinel_length >= read_size / 2)
626    {
627      if (SIZE_MAX / 2 < read_size)
628        xalloc_die ();
629      read_size *= 2;
630    }
631  half_buffer_size = read_size + sentinel_length + 1;
632  G_buffer_size = 2 * half_buffer_size;
633  if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
634    xalloc_die ();
635  G_buffer = xmalloc (G_buffer_size);
636  {
637  void *buf = G_buffer;
638  if (sentinel_length)
639    {
640      strcpy (G_buffer, separator);
641      G_buffer += sentinel_length;
642    }
643  else
644    {
645      ++G_buffer;
646    }
647
648  file = (optind < argc
649          ? (char const *const *) &argv[optind]
650          : default_file_list);
651
652  if (O_BINARY && ! isatty (STDOUT_FILENO))
653    xfreopen (NULL, "wb", stdout);
654
655  {
656    size_t i;
657    ok = true;
658    for (i = 0; file[i]; ++i)
659      ok &= tac_file (file[i]);
660  }
661
662  /* Flush the output buffer. */
663  output ((char *) NULL, (char *) NULL);
664
665  if (have_read_stdin && close (STDIN_FILENO) < 0)
666    {
667      error (0, errno, "-");
668      ok = false;
669    }
670  free (buf);
671  }
672  exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
673}
674