1/* tac - concatenate and print files in reverse 2 Copyright (C) 1988-1991, 1995-2006, 2008-2010 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation, either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17/* Written by Jay Lepreau (lepreau@cs.utah.edu). 18 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */ 19 20/* Copy each FILE, or the standard input if none are given or when a 21 FILE name of "-" is encountered, to the standard output with the 22 order of the records reversed. The records are separated by 23 instances of a string, or a newline if none is given. By default, the 24 separator string is attached to the end of the record that it 25 follows in the file. 26 27 Options: 28 -b, --before The separator is attached to the beginning 29 of the record that it precedes in the file. 30 -r, --regex The separator is a regular expression. 31 -s, --separator=separator Use SEPARATOR as the record separator. 32 33 To reverse a file byte by byte, use (in bash, ksh, or sh): 34tac -r -s '.\| 35' file */ 36 37#include <config.h> 38 39#include <stdio.h> 40#include <getopt.h> 41#include <sys/types.h> 42#include "system.h" 43 44#include <regex.h> 45 46#include "error.h" 47#include "quote.h" 48#include "quotearg.h" 49#include "safe-read.h" 50#include "stdlib--.h" 51#include "xfreopen.h" 52 53/* The official name of this program (e.g., no `g' prefix). */ 54#define PROGRAM_NAME "tac" 55 56#define AUTHORS \ 57 proper_name ("Jay Lepreau"), \ 58 proper_name ("David MacKenzie") 59 60#if defined __MSDOS__ || defined _WIN32 61/* Define this to non-zero on systems for which the regular mechanism 62 (of unlinking an open file and expecting to be able to write, seek 63 back to the beginning, then reread it) doesn't work. E.g., on Windows 64 and DOS systems. */ 65# define DONT_UNLINK_WHILE_OPEN 1 66#endif 67 68 69#ifndef DEFAULT_TMPDIR 70# define DEFAULT_TMPDIR "/tmp" 71#endif 72 73/* The number of bytes per atomic read. */ 74#define INITIAL_READSIZE 8192 75 76/* The number of bytes per atomic write. */ 77#define WRITESIZE 8192 78 79/* The string that separates the records of the file. */ 80static char const *separator; 81 82/* True if we have ever read standard input. */ 83static bool have_read_stdin = false; 84 85/* If true, print `separator' along with the record preceding it 86 in the file; otherwise with the record following it. */ 87static bool separator_ends_record; 88 89/* 0 if `separator' is to be matched as a regular expression; 90 otherwise, the length of `separator', used as a sentinel to 91 stop the search. */ 92static size_t sentinel_length; 93 94/* The length of a match with `separator'. If `sentinel_length' is 0, 95 `match_length' is computed every time a match succeeds; 96 otherwise, it is simply the length of `separator'. */ 97static size_t match_length; 98 99/* The input buffer. */ 100static char *G_buffer; 101 102/* The number of bytes to read at once into `buffer'. */ 103static size_t read_size; 104 105/* The size of `buffer'. This is read_size * 2 + sentinel_length + 2. 106 The extra 2 bytes allow `past_end' to have a value beyond the 107 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */ 108static size_t G_buffer_size; 109 110/* The compiled regular expression representing `separator'. */ 111static struct re_pattern_buffer compiled_separator; 112static char compiled_separator_fastmap[UCHAR_MAX + 1]; 113static struct re_registers regs; 114 115static struct option const longopts[] = 116{ 117 {"before", no_argument, NULL, 'b'}, 118 {"regex", no_argument, NULL, 'r'}, 119 {"separator", required_argument, NULL, 's'}, 120 {GETOPT_HELP_OPTION_DECL}, 121 {GETOPT_VERSION_OPTION_DECL}, 122 {NULL, 0, NULL, 0} 123}; 124 125void 126usage (int status) 127{ 128 if (status != EXIT_SUCCESS) 129 fprintf (stderr, _("Try `%s --help' for more information.\n"), 130 program_name); 131 else 132 { 133 printf (_("\ 134Usage: %s [OPTION]... [FILE]...\n\ 135"), 136 program_name); 137 fputs (_("\ 138Write each FILE to standard output, last line first.\n\ 139With no FILE, or when FILE is -, read standard input.\n\ 140\n\ 141"), stdout); 142 fputs (_("\ 143Mandatory arguments to long options are mandatory for short options too.\n\ 144"), stdout); 145 fputs (_("\ 146 -b, --before attach the separator before instead of after\n\ 147 -r, --regex interpret the separator as a regular expression\n\ 148 -s, --separator=STRING use STRING as the separator instead of newline\n\ 149"), stdout); 150 fputs (HELP_OPTION_DESCRIPTION, stdout); 151 fputs (VERSION_OPTION_DESCRIPTION, stdout); 152 emit_ancillary_info (); 153 } 154 exit (status); 155} 156 157/* Print the characters from START to PAST_END - 1. 158 If START is NULL, just flush the buffer. */ 159 160static void 161output (const char *start, const char *past_end) 162{ 163 static char buffer[WRITESIZE]; 164 static size_t bytes_in_buffer = 0; 165 size_t bytes_to_add = past_end - start; 166 size_t bytes_available = WRITESIZE - bytes_in_buffer; 167 168 if (start == 0) 169 { 170 fwrite (buffer, 1, bytes_in_buffer, stdout); 171 bytes_in_buffer = 0; 172 return; 173 } 174 175 /* Write out as many full buffers as possible. */ 176 while (bytes_to_add >= bytes_available) 177 { 178 memcpy (buffer + bytes_in_buffer, start, bytes_available); 179 bytes_to_add -= bytes_available; 180 start += bytes_available; 181 fwrite (buffer, 1, WRITESIZE, stdout); 182 bytes_in_buffer = 0; 183 bytes_available = WRITESIZE; 184 } 185 186 memcpy (buffer + bytes_in_buffer, start, bytes_to_add); 187 bytes_in_buffer += bytes_to_add; 188} 189 190/* Print in reverse the file open on descriptor FD for reading FILE. 191 Return true if successful. */ 192 193static bool 194tac_seekable (int input_fd, const char *file) 195{ 196 /* Pointer to the location in `G_buffer' where the search for 197 the next separator will begin. */ 198 char *match_start; 199 200 /* Pointer to one past the rightmost character in `G_buffer' that 201 has not been printed yet. */ 202 char *past_end; 203 204 /* Length of the record growing in `G_buffer'. */ 205 size_t saved_record_size; 206 207 /* Offset in the file of the next read. */ 208 off_t file_pos; 209 210 /* True if `output' has not been called yet for any file. 211 Only used when the separator is attached to the preceding record. */ 212 bool first_time = true; 213 char first_char = *separator; /* Speed optimization, non-regexp. */ 214 char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */ 215 size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */ 216 217 /* Find the size of the input file. */ 218 file_pos = lseek (input_fd, (off_t) 0, SEEK_END); 219 if (file_pos < 1) 220 return true; /* It's an empty file. */ 221 222 /* Arrange for the first read to lop off enough to leave the rest of the 223 file a multiple of `read_size'. Since `read_size' can change, this may 224 not always hold during the program run, but since it usually will, leave 225 it here for i/o efficiency (page/sector boundaries and all that). 226 Note: the efficiency gain has not been verified. */ 227 saved_record_size = file_pos % read_size; 228 if (saved_record_size == 0) 229 saved_record_size = read_size; 230 file_pos -= saved_record_size; 231 /* `file_pos' now points to the start of the last (probably partial) block 232 in the input file. */ 233 234 if (lseek (input_fd, file_pos, SEEK_SET) < 0) 235 error (0, errno, _("%s: seek failed"), quotearg_colon (file)); 236 237 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size) 238 { 239 error (0, errno, _("%s: read error"), quotearg_colon (file)); 240 return false; 241 } 242 243 match_start = past_end = G_buffer + saved_record_size; 244 /* For non-regexp search, move past impossible positions for a match. */ 245 if (sentinel_length) 246 match_start -= match_length1; 247 248 for (;;) 249 { 250 /* Search backward from `match_start' - 1 to `G_buffer' for a match 251 with `separator'; for speed, use strncmp if `separator' contains no 252 metacharacters. 253 If the match succeeds, set `match_start' to point to the start of 254 the match and `match_length' to the length of the match. 255 Otherwise, make `match_start' < `G_buffer'. */ 256 if (sentinel_length == 0) 257 { 258 size_t i = match_start - G_buffer; 259 regoff_t ri = i; 260 regoff_t range = 1 - ri; 261 regoff_t ret; 262 263 if (1 < range) 264 error (EXIT_FAILURE, 0, _("record too large")); 265 266 if (range == 1 267 || ((ret = re_search (&compiled_separator, G_buffer, 268 i, i - 1, range, ®s)) 269 == -1)) 270 match_start = G_buffer - 1; 271 else if (ret == -2) 272 { 273 error (EXIT_FAILURE, 0, 274 _("error in regular expression search")); 275 } 276 else 277 { 278 match_start = G_buffer + regs.start[0]; 279 match_length = regs.end[0] - regs.start[0]; 280 } 281 } 282 else 283 { 284 /* `match_length' is constant for non-regexp boundaries. */ 285 while (*--match_start != first_char 286 || (match_length1 && strncmp (match_start + 1, separator1, 287 match_length1))) 288 /* Do nothing. */ ; 289 } 290 291 /* Check whether we backed off the front of `G_buffer' without finding 292 a match for `separator'. */ 293 if (match_start < G_buffer) 294 { 295 if (file_pos == 0) 296 { 297 /* Hit the beginning of the file; print the remaining record. */ 298 output (G_buffer, past_end); 299 return true; 300 } 301 302 saved_record_size = past_end - G_buffer; 303 if (saved_record_size > read_size) 304 { 305 /* `G_buffer_size' is about twice `read_size', so since 306 we want to read in another `read_size' bytes before 307 the data already in `G_buffer', we need to increase 308 `G_buffer_size'. */ 309 char *newbuffer; 310 size_t offset = sentinel_length ? sentinel_length : 1; 311 ptrdiff_t match_start_offset = match_start - G_buffer; 312 ptrdiff_t past_end_offset = past_end - G_buffer; 313 size_t old_G_buffer_size = G_buffer_size; 314 315 read_size *= 2; 316 G_buffer_size = read_size * 2 + sentinel_length + 2; 317 if (G_buffer_size < old_G_buffer_size) 318 xalloc_die (); 319 newbuffer = xrealloc (G_buffer - offset, G_buffer_size); 320 newbuffer += offset; 321 /* Adjust the pointers for the new buffer location. */ 322 match_start = newbuffer + match_start_offset; 323 past_end = newbuffer + past_end_offset; 324 G_buffer = newbuffer; 325 } 326 327 /* Back up to the start of the next bufferfull of the file. */ 328 if (file_pos >= read_size) 329 file_pos -= read_size; 330 else 331 { 332 read_size = file_pos; 333 file_pos = 0; 334 } 335 if (lseek (input_fd, file_pos, SEEK_SET) < 0) 336 error (0, errno, _("%s: seek failed"), quotearg_colon (file)); 337 338 /* Shift the pending record data right to make room for the new. 339 The source and destination regions probably overlap. */ 340 memmove (G_buffer + read_size, G_buffer, saved_record_size); 341 past_end = G_buffer + read_size + saved_record_size; 342 /* For non-regexp searches, avoid unneccessary scanning. */ 343 if (sentinel_length) 344 match_start = G_buffer + read_size; 345 else 346 match_start = past_end; 347 348 if (safe_read (input_fd, G_buffer, read_size) != read_size) 349 { 350 error (0, errno, _("%s: read error"), quotearg_colon (file)); 351 return false; 352 } 353 } 354 else 355 { 356 /* Found a match of `separator'. */ 357 if (separator_ends_record) 358 { 359 char *match_end = match_start + match_length; 360 361 /* If this match of `separator' isn't at the end of the 362 file, print the record. */ 363 if (!first_time || match_end != past_end) 364 output (match_end, past_end); 365 past_end = match_end; 366 first_time = false; 367 } 368 else 369 { 370 output (match_start, past_end); 371 past_end = match_start; 372 } 373 374 /* For non-regex matching, we can back up. */ 375 if (sentinel_length > 0) 376 match_start -= match_length - 1; 377 } 378 } 379} 380 381#if DONT_UNLINK_WHILE_OPEN 382 383/* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk. 384 Using atexit like this is wrong, since it can fail 385 when called e.g. 32 or more times. 386 But this isn't a big deal, since the code is used only on WOE/DOS 387 systems, and few people invoke tac on that many nonseekable files. */ 388 389static const char *file_to_remove; 390static FILE *fp_to_close; 391 392static void 393unlink_tempfile (void) 394{ 395 fclose (fp_to_close); 396 unlink (file_to_remove); 397} 398 399static void 400record_or_unlink_tempfile (char const *fn, FILE *fp) 401{ 402 if (!file_to_remove) 403 { 404 file_to_remove = fn; 405 fp_to_close = fp; 406 atexit (unlink_tempfile); 407 } 408} 409 410#else 411 412static void 413record_or_unlink_tempfile (char const *fn, FILE *fp ATTRIBUTE_UNUSED) 414{ 415 unlink (fn); 416} 417 418#endif 419 420/* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to 421 a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream 422 and file name. Return true if successful. */ 423 424static bool 425copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file) 426{ 427 static char *template = NULL; 428 static char const *tempdir; 429 char *tempfile; 430 FILE *tmp; 431 int fd; 432 433 if (template == NULL) 434 { 435 char const * const Template = "%s/tacXXXXXX"; 436 tempdir = getenv ("TMPDIR"); 437 if (tempdir == NULL) 438 tempdir = DEFAULT_TMPDIR; 439 440 /* Subtract 2 for `%s' and add 1 for the trailing NUL byte. */ 441 template = xmalloc (strlen (tempdir) + strlen (Template) - 2 + 1); 442 sprintf (template, Template, tempdir); 443 } 444 445 /* FIXME: there's a small window between a successful mkstemp call 446 and the unlink that's performed by record_or_unlink_tempfile. 447 If we're interrupted in that interval, this code fails to remove 448 the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN, 449 the window is much larger -- it extends to the atexit-called 450 unlink_tempfile. 451 FIXME: clean up upon fatal signal. Don't block them, in case 452 $TMPFILE is a remote file system. */ 453 454 tempfile = template; 455 fd = mkstemp (template); 456 if (fd < 0) 457 { 458 error (0, errno, _("cannot create temporary file in %s"), 459 quote (tempdir)); 460 return false; 461 } 462 463 tmp = fdopen (fd, (O_BINARY ? "w+b" : "w+")); 464 if (! tmp) 465 { 466 error (0, errno, _("cannot open %s for writing"), quote (tempfile)); 467 close (fd); 468 unlink (tempfile); 469 return false; 470 } 471 472 record_or_unlink_tempfile (tempfile, tmp); 473 474 while (1) 475 { 476 size_t bytes_read = safe_read (input_fd, G_buffer, read_size); 477 if (bytes_read == 0) 478 break; 479 if (bytes_read == SAFE_READ_ERROR) 480 { 481 error (0, errno, _("%s: read error"), quotearg_colon (file)); 482 goto Fail; 483 } 484 485 if (fwrite (G_buffer, 1, bytes_read, tmp) != bytes_read) 486 { 487 error (0, errno, _("%s: write error"), quotearg_colon (tempfile)); 488 goto Fail; 489 } 490 } 491 492 if (fflush (tmp) != 0) 493 { 494 error (0, errno, _("%s: write error"), quotearg_colon (tempfile)); 495 goto Fail; 496 } 497 498 *g_tmp = tmp; 499 *g_tempfile = tempfile; 500 return true; 501 502 Fail: 503 fclose (tmp); 504 return false; 505} 506 507/* Copy INPUT_FD to a temporary, then tac that file. 508 Return true if successful. */ 509 510static bool 511tac_nonseekable (int input_fd, const char *file) 512{ 513 FILE *tmp_stream; 514 char *tmp_file; 515 return (copy_to_temp (&tmp_stream, &tmp_file, input_fd, file) 516 && tac_seekable (fileno (tmp_stream), tmp_file)); 517} 518 519/* Print FILE in reverse, copying it to a temporary 520 file first if it is not seekable. 521 Return true if successful. */ 522 523static bool 524tac_file (const char *filename) 525{ 526 bool ok; 527 off_t file_size; 528 int fd; 529 bool is_stdin = STREQ (filename, "-"); 530 531 if (is_stdin) 532 { 533 have_read_stdin = true; 534 fd = STDIN_FILENO; 535 filename = _("standard input"); 536 if (O_BINARY && ! isatty (STDIN_FILENO)) 537 xfreopen (NULL, "rb", stdin); 538 } 539 else 540 { 541 fd = open (filename, O_RDONLY | O_BINARY); 542 if (fd < 0) 543 { 544 error (0, errno, _("cannot open %s for reading"), quote (filename)); 545 return false; 546 } 547 } 548 549 file_size = lseek (fd, (off_t) 0, SEEK_END); 550 551 ok = (file_size < 0 || isatty (fd) 552 ? tac_nonseekable (fd, filename) 553 : tac_seekable (fd, filename)); 554 555 if (!is_stdin && close (fd) != 0) 556 { 557 error (0, errno, _("%s: read error"), quotearg_colon (filename)); 558 ok = false; 559 } 560 return ok; 561} 562 563int 564main (int argc, char **argv) 565{ 566 const char *error_message; /* Return value from re_compile_pattern. */ 567 int optc; 568 bool ok; 569 size_t half_buffer_size; 570 571 /* Initializer for file_list if no file-arguments 572 were specified on the command line. */ 573 static char const *const default_file_list[] = {"-", NULL}; 574 char const *const *file; 575 576 initialize_main (&argc, &argv); 577 set_program_name (argv[0]); 578 setlocale (LC_ALL, ""); 579 bindtextdomain (PACKAGE, LOCALEDIR); 580 textdomain (PACKAGE); 581 582 atexit (close_stdout); 583 584 separator = "\n"; 585 sentinel_length = 1; 586 separator_ends_record = true; 587 588 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1) 589 { 590 switch (optc) 591 { 592 case 'b': 593 separator_ends_record = false; 594 break; 595 case 'r': 596 sentinel_length = 0; 597 break; 598 case 's': 599 separator = optarg; 600 if (*separator == 0) 601 error (EXIT_FAILURE, 0, _("separator cannot be empty")); 602 break; 603 case_GETOPT_HELP_CHAR; 604 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); 605 default: 606 usage (EXIT_FAILURE); 607 } 608 } 609 610 if (sentinel_length == 0) 611 { 612 compiled_separator.buffer = NULL; 613 compiled_separator.allocated = 0; 614 compiled_separator.fastmap = compiled_separator_fastmap; 615 compiled_separator.translate = NULL; 616 error_message = re_compile_pattern (separator, strlen (separator), 617 &compiled_separator); 618 if (error_message) 619 error (EXIT_FAILURE, 0, "%s", error_message); 620 } 621 else 622 match_length = sentinel_length = strlen (separator); 623 624 read_size = INITIAL_READSIZE; 625 while (sentinel_length >= read_size / 2) 626 { 627 if (SIZE_MAX / 2 < read_size) 628 xalloc_die (); 629 read_size *= 2; 630 } 631 half_buffer_size = read_size + sentinel_length + 1; 632 G_buffer_size = 2 * half_buffer_size; 633 if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size)) 634 xalloc_die (); 635 G_buffer = xmalloc (G_buffer_size); 636 { 637 void *buf = G_buffer; 638 if (sentinel_length) 639 { 640 strcpy (G_buffer, separator); 641 G_buffer += sentinel_length; 642 } 643 else 644 { 645 ++G_buffer; 646 } 647 648 file = (optind < argc 649 ? (char const *const *) &argv[optind] 650 : default_file_list); 651 652 if (O_BINARY && ! isatty (STDOUT_FILENO)) 653 xfreopen (NULL, "wb", stdout); 654 655 { 656 size_t i; 657 ok = true; 658 for (i = 0; file[i]; ++i) 659 ok &= tac_file (file[i]); 660 } 661 662 /* Flush the output buffer. */ 663 output ((char *) NULL, (char *) NULL); 664 665 if (have_read_stdin && close (STDIN_FILENO) < 0) 666 { 667 error (0, errno, "-"); 668 ok = false; 669 } 670 free (buf); 671 } 672 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); 673} 674