1/* cut - remove parts of lines of files 2 Copyright (C) 1997-2010 Free Software Foundation, Inc. 3 Copyright (C) 1984 David M. Ihnat 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation, either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18/* Written by David Ihnat. */ 19 20/* POSIX changes, bug fixes, long-named options, and cleanup 21 by David MacKenzie <djm@gnu.ai.mit.edu>. 22 23 Rewrite cut_fields and cut_bytes -- Jim Meyering. */ 24 25#include <config.h> 26 27#include <stdio.h> 28#include <assert.h> 29#include <getopt.h> 30#include <sys/types.h> 31#include "system.h" 32 33#include "error.h" 34#include "getndelim2.h" 35#include "hash.h" 36#include "quote.h" 37#include "xstrndup.h" 38 39/* The official name of this program (e.g., no `g' prefix). */ 40#define PROGRAM_NAME "cut" 41 42#define AUTHORS \ 43 proper_name ("David M. Ihnat"), \ 44 proper_name ("David MacKenzie"), \ 45 proper_name ("Jim Meyering") 46 47#define FATAL_ERROR(Message) \ 48 do \ 49 { \ 50 error (0, 0, (Message)); \ 51 usage (EXIT_FAILURE); \ 52 } \ 53 while (0) 54 55/* Append LOW, HIGH to the list RP of range pairs, allocating additional 56 space if necessary. Update local variable N_RP. When allocating, 57 update global variable N_RP_ALLOCATED. */ 58 59#define ADD_RANGE_PAIR(rp, low, high) \ 60 do \ 61 { \ 62 if (low == 0 || high == 0) \ 63 FATAL_ERROR (_("fields and positions are numbered from 1")); \ 64 if (n_rp >= n_rp_allocated) \ 65 { \ 66 (rp) = X2NREALLOC (rp, &n_rp_allocated); \ 67 } \ 68 rp[n_rp].lo = (low); \ 69 rp[n_rp].hi = (high); \ 70 ++n_rp; \ 71 } \ 72 while (0) 73 74struct range_pair 75 { 76 size_t lo; 77 size_t hi; 78 }; 79 80/* This buffer is used to support the semantics of the -s option 81 (or lack of same) when the specified field list includes (does 82 not include) the first field. In both of those cases, the entire 83 first field must be read into this buffer to determine whether it 84 is followed by a delimiter or a newline before any of it may be 85 output. Otherwise, cut_fields can do the job without using this 86 buffer. */ 87static char *field_1_buffer; 88 89/* The number of bytes allocated for FIELD_1_BUFFER. */ 90static size_t field_1_bufsize; 91 92/* The largest field or byte index used as an endpoint of a closed 93 or degenerate range specification; this doesn't include the starting 94 index of right-open-ended ranges. For example, with either range spec 95 `2-5,9-', `2-3,5,9-' this variable would be set to 5. */ 96static size_t max_range_endpoint; 97 98/* If nonzero, this is the index of the first field in a range that goes 99 to end of line. */ 100static size_t eol_range_start; 101 102/* This is a bit vector. 103 In byte mode, which bytes to output. 104 In field mode, which DELIM-separated fields to output. 105 Both bytes and fields are numbered starting with 1, 106 so the zeroth bit of this array is unused. 107 A field or byte K has been selected if 108 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K)) 109 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */ 110static unsigned char *printable_field; 111 112enum operating_mode 113 { 114 undefined_mode, 115 116 /* Output characters that are in the given bytes. */ 117 byte_mode, 118 119 /* Output the given delimeter-separated fields. */ 120 field_mode 121 }; 122 123static enum operating_mode operating_mode; 124 125/* If true do not output lines containing no delimeter characters. 126 Otherwise, all such lines are printed. This option is valid only 127 with field mode. */ 128static bool suppress_non_delimited; 129 130/* If nonzero, print all bytes, characters, or fields _except_ 131 those that were specified. */ 132static bool complement; 133 134/* The delimeter character for field mode. */ 135static unsigned char delim; 136 137/* True if the --output-delimiter=STRING option was specified. */ 138static bool output_delimiter_specified; 139 140/* The length of output_delimiter_string. */ 141static size_t output_delimiter_length; 142 143/* The output field separator string. Defaults to the 1-character 144 string consisting of the input delimiter. */ 145static char *output_delimiter_string; 146 147/* True if we have ever read standard input. */ 148static bool have_read_stdin; 149 150#define HT_RANGE_START_INDEX_INITIAL_CAPACITY 31 151 152/* The set of range-start indices. For example, given a range-spec list like 153 `-b1,3-5,4-9,15-', the following indices will be recorded here: 1, 3, 15. 154 Note that although `4' looks like a range-start index, it is in the middle 155 of the `3-5' range, so it doesn't count. 156 This table is created/used IFF output_delimiter_specified is set. */ 157static Hash_table *range_start_ht; 158 159/* For long options that have no equivalent short option, use a 160 non-character as a pseudo short option, starting with CHAR_MAX + 1. */ 161enum 162{ 163 OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1, 164 COMPLEMENT_OPTION 165}; 166 167static struct option const longopts[] = 168{ 169 {"bytes", required_argument, NULL, 'b'}, 170 {"characters", required_argument, NULL, 'c'}, 171 {"fields", required_argument, NULL, 'f'}, 172 {"delimiter", required_argument, NULL, 'd'}, 173 {"only-delimited", no_argument, NULL, 's'}, 174 {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION}, 175 {"complement", no_argument, NULL, COMPLEMENT_OPTION}, 176 {GETOPT_HELP_OPTION_DECL}, 177 {GETOPT_VERSION_OPTION_DECL}, 178 {NULL, 0, NULL, 0} 179}; 180 181void 182usage (int status) 183{ 184 if (status != EXIT_SUCCESS) 185 fprintf (stderr, _("Try `%s --help' for more information.\n"), 186 program_name); 187 else 188 { 189 printf (_("\ 190Usage: %s OPTION... [FILE]...\n\ 191"), 192 program_name); 193 fputs (_("\ 194Print selected parts of lines from each FILE to standard output.\n\ 195\n\ 196"), stdout); 197 fputs (_("\ 198Mandatory arguments to long options are mandatory for short options too.\n\ 199"), stdout); 200 fputs (_("\ 201 -b, --bytes=LIST select only these bytes\n\ 202 -c, --characters=LIST select only these characters\n\ 203 -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\ 204"), stdout); 205 fputs (_("\ 206 -f, --fields=LIST select only these fields; also print any line\n\ 207 that contains no delimiter character, unless\n\ 208 the -s option is specified\n\ 209 -n (ignored)\n\ 210"), stdout); 211 fputs (_("\ 212 --complement complement the set of selected bytes, characters\n\ 213 or fields\n\ 214"), stdout); 215 fputs (_("\ 216 -s, --only-delimited do not print lines not containing delimiters\n\ 217 --output-delimiter=STRING use STRING as the output delimiter\n\ 218 the default is to use the input delimiter\n\ 219"), stdout); 220 fputs (HELP_OPTION_DESCRIPTION, stdout); 221 fputs (VERSION_OPTION_DESCRIPTION, stdout); 222 fputs (_("\ 223\n\ 224Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\ 225range, or many ranges separated by commas. Selected input is written\n\ 226in the same order that it is read, and is written exactly once.\n\ 227"), stdout); 228 fputs (_("\ 229Each range is one of:\n\ 230\n\ 231 N N'th byte, character or field, counted from 1\n\ 232 N- from N'th byte, character or field, to end of line\n\ 233 N-M from N'th to M'th (included) byte, character or field\n\ 234 -M from first to M'th (included) byte, character or field\n\ 235\n\ 236With no FILE, or when FILE is -, read standard input.\n\ 237"), stdout); 238 emit_ancillary_info (); 239 } 240 exit (status); 241} 242 243static inline void 244mark_range_start (size_t i) 245{ 246 /* Record the fact that `i' is a range-start index. */ 247 void *ent_from_table = hash_insert (range_start_ht, (void*) i); 248 if (ent_from_table == NULL) 249 { 250 /* Insertion failed due to lack of memory. */ 251 xalloc_die (); 252 } 253 assert ((size_t) ent_from_table == i); 254} 255 256static inline void 257mark_printable_field (size_t i) 258{ 259 size_t n = i / CHAR_BIT; 260 printable_field[n] |= (1 << (i % CHAR_BIT)); 261} 262 263static inline bool 264is_printable_field (size_t i) 265{ 266 size_t n = i / CHAR_BIT; 267 return (printable_field[n] >> (i % CHAR_BIT)) & 1; 268} 269 270static size_t 271hash_int (const void *x, size_t tablesize) 272{ 273#ifdef UINTPTR_MAX 274 uintptr_t y = (uintptr_t) x; 275#else 276 size_t y = (size_t) x; 277#endif 278 return y % tablesize; 279} 280 281static bool 282hash_compare_ints (void const *x, void const *y) 283{ 284 return (x == y) ? true : false; 285} 286 287static bool 288is_range_start_index (size_t i) 289{ 290 return hash_lookup (range_start_ht, (void *) i) ? true : false; 291} 292 293/* Return nonzero if the K'th field or byte is printable. 294 When returning nonzero, if RANGE_START is non-NULL, 295 set *RANGE_START to true if K is the beginning of a range, and to 296 false otherwise. */ 297 298static bool 299print_kth (size_t k, bool *range_start) 300{ 301 bool k_selected 302 = ((0 < eol_range_start && eol_range_start <= k) 303 || (k <= max_range_endpoint && is_printable_field (k))); 304 305 bool is_selected = k_selected ^ complement; 306 if (range_start && is_selected) 307 *range_start = is_range_start_index (k); 308 309 return is_selected; 310} 311 312/* Comparison function for qsort to order the list of 313 struct range_pairs. */ 314static int 315compare_ranges (const void *a, const void *b) 316{ 317 int a_start = ((const struct range_pair *) a)->lo; 318 int b_start = ((const struct range_pair *) b)->lo; 319 return a_start < b_start ? -1 : a_start > b_start; 320} 321 322/* Given the list of field or byte range specifications FIELDSTR, set 323 MAX_RANGE_ENDPOINT and allocate and initialize the PRINTABLE_FIELD 324 array. If there is a right-open-ended range, set EOL_RANGE_START 325 to its starting index. FIELDSTR should be composed of one or more 326 numbers or ranges of numbers, separated by blanks or commas. 327 Incomplete ranges may be given: `-m' means `1-m'; `n-' means `n' 328 through end of line. Return true if FIELDSTR contains at least 329 one field specification, false otherwise. */ 330 331/* FIXME-someday: What if the user wants to cut out the 1,000,000-th 332 field of some huge input file? This function shouldn't have to 333 allocate a table of a million bits just so we can test every 334 field < 10^6 with an array dereference. Instead, consider using 335 an adaptive approach: if the range of selected fields is too large, 336 but only a few fields/byte-offsets are actually selected, use a 337 hash table. If the range of selected fields is too large, and 338 too many are selected, then resort to using the range-pairs (the 339 `rp' array) directly. */ 340 341static bool 342set_fields (const char *fieldstr) 343{ 344 size_t initial = 1; /* Value of first number in a range. */ 345 size_t value = 0; /* If nonzero, a number being accumulated. */ 346 bool lhs_specified = false; 347 bool rhs_specified = false; 348 bool dash_found = false; /* True if a '-' is found in this field. */ 349 bool field_found = false; /* True if at least one field spec 350 has been processed. */ 351 352 struct range_pair *rp = NULL; 353 size_t n_rp = 0; 354 size_t n_rp_allocated = 0; 355 size_t i; 356 bool in_digits = false; 357 358 /* Collect and store in RP the range end points. 359 It also sets EOL_RANGE_START if appropriate. */ 360 361 for (;;) 362 { 363 if (*fieldstr == '-') 364 { 365 in_digits = false; 366 /* Starting a range. */ 367 if (dash_found) 368 FATAL_ERROR (_("invalid byte or field list")); 369 dash_found = true; 370 fieldstr++; 371 372 initial = (lhs_specified ? value : 1); 373 value = 0; 374 } 375 else if (*fieldstr == ',' || 376 isblank (to_uchar (*fieldstr)) || *fieldstr == '\0') 377 { 378 in_digits = false; 379 /* Ending the string, or this field/byte sublist. */ 380 if (dash_found) 381 { 382 dash_found = false; 383 384 if (!lhs_specified && !rhs_specified) 385 FATAL_ERROR (_("invalid range with no endpoint: -")); 386 387 /* A range. Possibilities: -n, m-n, n-. 388 In any case, `initial' contains the start of the range. */ 389 if (!rhs_specified) 390 { 391 /* `n-'. From `initial' to end of line. */ 392 eol_range_start = initial; 393 field_found = true; 394 } 395 else 396 { 397 /* `m-n' or `-n' (1-n). */ 398 if (value < initial) 399 FATAL_ERROR (_("invalid decreasing range")); 400 401 /* Is there already a range going to end of line? */ 402 if (eol_range_start != 0) 403 { 404 /* Yes. Is the new sequence already contained 405 in the old one? If so, no processing is 406 necessary. */ 407 if (initial < eol_range_start) 408 { 409 /* No, the new sequence starts before the 410 old. Does the old range going to end of line 411 extend into the new range? */ 412 if (eol_range_start <= value) 413 { 414 /* Yes. Simply move the end of line marker. */ 415 eol_range_start = initial; 416 } 417 else 418 { 419 /* No. A simple range, before and disjoint from 420 the range going to end of line. Fill it. */ 421 ADD_RANGE_PAIR (rp, initial, value); 422 } 423 424 /* In any case, some fields were selected. */ 425 field_found = true; 426 } 427 } 428 else 429 { 430 /* There is no range going to end of line. */ 431 ADD_RANGE_PAIR (rp, initial, value); 432 field_found = true; 433 } 434 value = 0; 435 } 436 } 437 else 438 { 439 /* A simple field number, not a range. */ 440 ADD_RANGE_PAIR (rp, value, value); 441 value = 0; 442 field_found = true; 443 } 444 445 if (*fieldstr == '\0') 446 { 447 break; 448 } 449 450 fieldstr++; 451 lhs_specified = false; 452 rhs_specified = false; 453 } 454 else if (ISDIGIT (*fieldstr)) 455 { 456 /* Record beginning of digit string, in case we have to 457 complain about it. */ 458 static char const *num_start; 459 if (!in_digits || !num_start) 460 num_start = fieldstr; 461 in_digits = true; 462 463 if (dash_found) 464 rhs_specified = 1; 465 else 466 lhs_specified = 1; 467 468 /* Detect overflow. */ 469 if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t)) 470 { 471 /* In case the user specified -c$(echo 2^64|bc),22, 472 complain only about the first number. */ 473 /* Determine the length of the offending number. */ 474 size_t len = strspn (num_start, "0123456789"); 475 char *bad_num = xstrndup (num_start, len); 476 if (operating_mode == byte_mode) 477 error (0, 0, 478 _("byte offset %s is too large"), quote (bad_num)); 479 else 480 error (0, 0, 481 _("field number %s is too large"), quote (bad_num)); 482 free (bad_num); 483 exit (EXIT_FAILURE); 484 } 485 486 fieldstr++; 487 } 488 else 489 FATAL_ERROR (_("invalid byte or field list")); 490 } 491 492 max_range_endpoint = 0; 493 for (i = 0; i < n_rp; i++) 494 { 495 if (rp[i].hi > max_range_endpoint) 496 max_range_endpoint = rp[i].hi; 497 } 498 499 /* Allocate an array large enough so that it may be indexed by 500 the field numbers corresponding to all finite ranges 501 (i.e. `2-6' or `-4', but not `5-') in FIELDSTR. */ 502 503 printable_field = xzalloc (max_range_endpoint / CHAR_BIT + 1); 504 505 qsort (rp, n_rp, sizeof (rp[0]), compare_ranges); 506 507 /* Set the array entries corresponding to integers in the ranges of RP. */ 508 for (i = 0; i < n_rp; i++) 509 { 510 size_t j; 511 size_t rsi_candidate; 512 513 /* Record the range-start indices, i.e., record each start 514 index that is not part of any other (lo..hi] range. */ 515 rsi_candidate = complement ? rp[i].hi + 1 : rp[i].lo; 516 if (output_delimiter_specified 517 && !is_printable_field (rsi_candidate)) 518 mark_range_start (rsi_candidate); 519 520 for (j = rp[i].lo; j <= rp[i].hi; j++) 521 mark_printable_field (j); 522 } 523 524 if (output_delimiter_specified 525 && !complement 526 && eol_range_start && !is_printable_field (eol_range_start)) 527 mark_range_start (eol_range_start); 528 529 free (rp); 530 531 return field_found; 532} 533 534/* Read from stream STREAM, printing to standard output any selected bytes. */ 535 536static void 537cut_bytes (FILE *stream) 538{ 539 size_t byte_idx; /* Number of bytes in the line so far. */ 540 /* Whether to begin printing delimiters between ranges for the current line. 541 Set after we've begun printing data corresponding to the first range. */ 542 bool print_delimiter; 543 544 byte_idx = 0; 545 print_delimiter = false; 546 while (1) 547 { 548 int c; /* Each character from the file. */ 549 550 c = getc (stream); 551 552 if (c == '\n') 553 { 554 putchar ('\n'); 555 byte_idx = 0; 556 print_delimiter = false; 557 } 558 else if (c == EOF) 559 { 560 if (byte_idx > 0) 561 putchar ('\n'); 562 break; 563 } 564 else 565 { 566 bool range_start; 567 bool *rs = output_delimiter_specified ? &range_start : NULL; 568 if (print_kth (++byte_idx, rs)) 569 { 570 if (rs && *rs && print_delimiter) 571 { 572 fwrite (output_delimiter_string, sizeof (char), 573 output_delimiter_length, stdout); 574 } 575 print_delimiter = true; 576 putchar (c); 577 } 578 } 579 } 580} 581 582/* Read from stream STREAM, printing to standard output any selected fields. */ 583 584static void 585cut_fields (FILE *stream) 586{ 587 int c; 588 size_t field_idx = 1; 589 bool found_any_selected_field = false; 590 bool buffer_first_field; 591 592 c = getc (stream); 593 if (c == EOF) 594 return; 595 596 ungetc (c, stream); 597 598 /* To support the semantics of the -s flag, we may have to buffer 599 all of the first field to determine whether it is `delimited.' 600 But that is unnecessary if all non-delimited lines must be printed 601 and the first field has been selected, or if non-delimited lines 602 must be suppressed and the first field has *not* been selected. 603 That is because a non-delimited line has exactly one field. */ 604 buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL)); 605 606 while (1) 607 { 608 if (field_idx == 1 && buffer_first_field) 609 { 610 ssize_t len; 611 size_t n_bytes; 612 613 len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0, 614 GETNLINE_NO_LIMIT, delim, '\n', stream); 615 if (len < 0) 616 { 617 free (field_1_buffer); 618 field_1_buffer = NULL; 619 if (ferror (stream) || feof (stream)) 620 break; 621 xalloc_die (); 622 } 623 624 n_bytes = len; 625 assert (n_bytes != 0); 626 627 /* If the first field extends to the end of line (it is not 628 delimited) and we are printing all non-delimited lines, 629 print this one. */ 630 if (to_uchar (field_1_buffer[n_bytes - 1]) != delim) 631 { 632 if (suppress_non_delimited) 633 { 634 /* Empty. */ 635 } 636 else 637 { 638 fwrite (field_1_buffer, sizeof (char), n_bytes, stdout); 639 /* Make sure the output line is newline terminated. */ 640 if (field_1_buffer[n_bytes - 1] != '\n') 641 putchar ('\n'); 642 } 643 continue; 644 } 645 if (print_kth (1, NULL)) 646 { 647 /* Print the field, but not the trailing delimiter. */ 648 fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout); 649 found_any_selected_field = true; 650 } 651 ++field_idx; 652 } 653 654 if (c != EOF) 655 { 656 if (print_kth (field_idx, NULL)) 657 { 658 if (found_any_selected_field) 659 { 660 fwrite (output_delimiter_string, sizeof (char), 661 output_delimiter_length, stdout); 662 } 663 found_any_selected_field = true; 664 665 while ((c = getc (stream)) != delim && c != '\n' && c != EOF) 666 { 667 putchar (c); 668 } 669 } 670 else 671 { 672 while ((c = getc (stream)) != delim && c != '\n' && c != EOF) 673 { 674 /* Empty. */ 675 } 676 } 677 } 678 679 if (c == '\n') 680 { 681 c = getc (stream); 682 if (c != EOF) 683 { 684 ungetc (c, stream); 685 c = '\n'; 686 } 687 } 688 689 if (c == delim) 690 ++field_idx; 691 else if (c == '\n' || c == EOF) 692 { 693 if (found_any_selected_field 694 || !(suppress_non_delimited && field_idx == 1)) 695 putchar ('\n'); 696 if (c == EOF) 697 break; 698 field_idx = 1; 699 found_any_selected_field = false; 700 } 701 } 702} 703 704static void 705cut_stream (FILE *stream) 706{ 707 if (operating_mode == byte_mode) 708 cut_bytes (stream); 709 else 710 cut_fields (stream); 711} 712 713/* Process file FILE to standard output. 714 Return true if successful. */ 715 716static bool 717cut_file (char const *file) 718{ 719 FILE *stream; 720 721 if (STREQ (file, "-")) 722 { 723 have_read_stdin = true; 724 stream = stdin; 725 } 726 else 727 { 728 stream = fopen (file, "r"); 729 if (stream == NULL) 730 { 731 error (0, errno, "%s", file); 732 return false; 733 } 734 } 735 736 cut_stream (stream); 737 738 if (ferror (stream)) 739 { 740 error (0, errno, "%s", file); 741 return false; 742 } 743 if (STREQ (file, "-")) 744 clearerr (stream); /* Also clear EOF. */ 745 else if (fclose (stream) == EOF) 746 { 747 error (0, errno, "%s", file); 748 return false; 749 } 750 return true; 751} 752 753int 754main (int argc, char **argv) 755{ 756 int optc; 757 bool ok; 758 bool delim_specified = false; 759 char *spec_list_string IF_LINT(= NULL); 760 761 initialize_main (&argc, &argv); 762 set_program_name (argv[0]); 763 setlocale (LC_ALL, ""); 764 bindtextdomain (PACKAGE, LOCALEDIR); 765 textdomain (PACKAGE); 766 767 atexit (close_stdout); 768 769 operating_mode = undefined_mode; 770 771 /* By default, all non-delimited lines are printed. */ 772 suppress_non_delimited = false; 773 774 delim = '\0'; 775 have_read_stdin = false; 776 777 while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1) 778 { 779 switch (optc) 780 { 781 case 'b': 782 case 'c': 783 /* Build the byte list. */ 784 if (operating_mode != undefined_mode) 785 FATAL_ERROR (_("only one type of list may be specified")); 786 operating_mode = byte_mode; 787 spec_list_string = optarg; 788 break; 789 790 case 'f': 791 /* Build the field list. */ 792 if (operating_mode != undefined_mode) 793 FATAL_ERROR (_("only one type of list may be specified")); 794 operating_mode = field_mode; 795 spec_list_string = optarg; 796 break; 797 798 case 'd': 799 /* New delimiter. */ 800 /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */ 801 if (optarg[0] != '\0' && optarg[1] != '\0') 802 FATAL_ERROR (_("the delimiter must be a single character")); 803 delim = optarg[0]; 804 delim_specified = true; 805 break; 806 807 case OUTPUT_DELIMITER_OPTION: 808 output_delimiter_specified = true; 809 /* Interpret --output-delimiter='' to mean 810 `use the NUL byte as the delimiter.' */ 811 output_delimiter_length = (optarg[0] == '\0' 812 ? 1 : strlen (optarg)); 813 output_delimiter_string = xstrdup (optarg); 814 break; 815 816 case 'n': 817 break; 818 819 case 's': 820 suppress_non_delimited = true; 821 break; 822 823 case COMPLEMENT_OPTION: 824 complement = true; 825 break; 826 827 case_GETOPT_HELP_CHAR; 828 829 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); 830 831 default: 832 usage (EXIT_FAILURE); 833 } 834 } 835 836 if (operating_mode == undefined_mode) 837 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields")); 838 839 if (delim != '\0' && operating_mode != field_mode) 840 FATAL_ERROR (_("an input delimiter may be specified only\ 841 when operating on fields")); 842 843 if (suppress_non_delimited && operating_mode != field_mode) 844 FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\ 845\tonly when operating on fields")); 846 847 if (output_delimiter_specified) 848 { 849 range_start_ht = hash_initialize (HT_RANGE_START_INDEX_INITIAL_CAPACITY, 850 NULL, hash_int, 851 hash_compare_ints, NULL); 852 if (range_start_ht == NULL) 853 xalloc_die (); 854 855 } 856 857 if (! set_fields (spec_list_string)) 858 { 859 if (operating_mode == field_mode) 860 FATAL_ERROR (_("missing list of fields")); 861 else 862 FATAL_ERROR (_("missing list of positions")); 863 } 864 865 if (!delim_specified) 866 delim = '\t'; 867 868 if (output_delimiter_string == NULL) 869 { 870 static char dummy[2]; 871 dummy[0] = delim; 872 dummy[1] = '\0'; 873 output_delimiter_string = dummy; 874 output_delimiter_length = 1; 875 } 876 877 if (optind == argc) 878 ok = cut_file ("-"); 879 else 880 for (ok = true; optind < argc; optind++) 881 ok &= cut_file (argv[optind]); 882 883 if (range_start_ht) 884 hash_free (range_start_ht); 885 886 if (have_read_stdin && fclose (stdin) == EOF) 887 { 888 error (0, errno, "-"); 889 ok = false; 890 } 891 892 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); 893} 894