1/* sort - sort lines of text (with all kinds of options). 2 Copyright (C) 1988, 1991-2010 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation, either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. 16 17 Written December 1988 by Mike Haertel. 18 The author may be reached (Email) at the address mike@gnu.ai.mit.edu, 19 or (US mail) as Mike Haertel c/o Free Software Foundation. 20 21 Ørn E. Hansen added NLS support in 1997. */ 22 23#include <config.h> 24 25#include <getopt.h> 26#include <sys/types.h> 27#include <sys/wait.h> 28#include <signal.h> 29#include "system.h" 30#include "argmatch.h" 31#include "error.h" 32#include "filevercmp.h" 33#include "hard-locale.h" 34#include "hash.h" 35#include "md5.h" 36#include "physmem.h" 37#include "posixver.h" 38#include "quote.h" 39#include "quotearg.h" 40#include "randread.h" 41#include "readtokens0.h" 42#include "stdio--.h" 43#include "stdlib--.h" 44#include "strnumcmp.h" 45#include "xmemcoll.h" 46#include "xmemxfrm.h" 47#include "xnanosleep.h" 48#include "xstrtol.h" 49 50#if HAVE_SYS_RESOURCE_H 51# include <sys/resource.h> 52#endif 53#ifndef RLIMIT_DATA 54struct rlimit { size_t rlim_cur; }; 55# define getrlimit(Resource, Rlp) (-1) 56#endif 57 58/* The official name of this program (e.g., no `g' prefix). */ 59#define PROGRAM_NAME "sort" 60 61#define AUTHORS \ 62 proper_name ("Mike Haertel"), \ 63 proper_name ("Paul Eggert") 64 65#if HAVE_LANGINFO_CODESET 66# include <langinfo.h> 67#endif 68 69/* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is 70 present. */ 71#ifndef SA_NOCLDSTOP 72# define SA_NOCLDSTOP 0 73/* No sigprocmask. Always 'return' zero. */ 74# define sigprocmask(How, Set, Oset) (0) 75# define sigset_t int 76# if ! HAVE_SIGINTERRUPT 77# define siginterrupt(sig, flag) /* empty */ 78# endif 79#endif 80 81#if !defined OPEN_MAX && defined NR_OPEN 82# define OPEN_MAX NR_OPEN 83#endif 84#if !defined OPEN_MAX 85# define OPEN_MAX 20 86#endif 87 88#define UCHAR_LIM (UCHAR_MAX + 1) 89 90#ifndef DEFAULT_TMPDIR 91# define DEFAULT_TMPDIR "/tmp" 92#endif 93 94/* Exit statuses. */ 95enum 96 { 97 /* POSIX says to exit with status 1 if invoked with -c and the 98 input is not properly sorted. */ 99 SORT_OUT_OF_ORDER = 1, 100 101 /* POSIX says any other irregular exit must exit with a status 102 code greater than 1. */ 103 SORT_FAILURE = 2 104 }; 105 106enum 107 { 108 /* The number of times we should try to fork a compression process 109 (we retry if the fork call fails). We don't _need_ to compress 110 temp files, this is just to reduce disk access, so this number 111 can be small. Each retry doubles in duration. */ 112 MAX_FORK_TRIES_COMPRESS = 4, 113 114 /* The number of times we should try to fork a decompression process. 115 If we can't fork a decompression process, we can't sort, so this 116 number should be big. Each retry doubles in duration. */ 117 MAX_FORK_TRIES_DECOMPRESS = 9 118 }; 119 120/* The representation of the decimal point in the current locale. */ 121static int decimal_point; 122 123/* Thousands separator; if -1, then there isn't one. */ 124static int thousands_sep; 125 126/* Nonzero if the corresponding locales are hard. */ 127static bool hard_LC_COLLATE; 128#if HAVE_NL_LANGINFO 129static bool hard_LC_TIME; 130#endif 131 132#define NONZERO(x) ((x) != 0) 133 134/* The kind of blanks for '-b' to skip in various options. */ 135enum blanktype { bl_start, bl_end, bl_both }; 136 137/* The character marking end of line. Default to \n. */ 138static char eolchar = '\n'; 139 140/* Lines are held in core as counted strings. */ 141struct line 142{ 143 char *text; /* Text of the line. */ 144 size_t length; /* Length including final newline. */ 145 char *keybeg; /* Start of first key. */ 146 char *keylim; /* Limit of first key. */ 147}; 148 149/* Input buffers. */ 150struct buffer 151{ 152 char *buf; /* Dynamically allocated buffer, 153 partitioned into 3 regions: 154 - input data; 155 - unused area; 156 - an array of lines, in reverse order. */ 157 size_t used; /* Number of bytes used for input data. */ 158 size_t nlines; /* Number of lines in the line array. */ 159 size_t alloc; /* Number of bytes allocated. */ 160 size_t left; /* Number of bytes left from previous reads. */ 161 size_t line_bytes; /* Number of bytes to reserve for each line. */ 162 bool eof; /* An EOF has been read. */ 163}; 164 165struct keyfield 166{ 167 size_t sword; /* Zero-origin 'word' to start at. */ 168 size_t schar; /* Additional characters to skip. */ 169 size_t eword; /* Zero-origin first word after field. */ 170 size_t echar; /* Additional characters in field. */ 171 bool const *ignore; /* Boolean array of characters to ignore. */ 172 char const *translate; /* Translation applied to characters. */ 173 bool skipsblanks; /* Skip leading blanks when finding start. */ 174 bool skipeblanks; /* Skip leading blanks when finding end. */ 175 bool numeric; /* Flag for numeric comparison. Handle 176 strings of digits with optional decimal 177 point, but no exponential notation. */ 178 bool random; /* Sort by random hash of key. */ 179 bool general_numeric; /* Flag for general, numeric comparison. 180 Handle numbers in exponential notation. */ 181 bool human_numeric; /* Flag for sorting by human readable 182 units with either SI xor IEC prefixes. */ 183 int si_present; /* Flag for checking for mixed SI and IEC. */ 184 bool month; /* Flag for comparison by month name. */ 185 bool reverse; /* Reverse the sense of comparison. */ 186 bool version; /* sort by version number */ 187 struct keyfield *next; /* Next keyfield to try. */ 188}; 189 190struct month 191{ 192 char const *name; 193 int val; 194}; 195 196/* FIXME: None of these tables work with multibyte character sets. 197 Also, there are many other bugs when handling multibyte characters. 198 One way to fix this is to rewrite `sort' to use wide characters 199 internally, but doing this with good performance is a bit 200 tricky. */ 201 202/* Table of blanks. */ 203static bool blanks[UCHAR_LIM]; 204 205/* Table of non-printing characters. */ 206static bool nonprinting[UCHAR_LIM]; 207 208/* Table of non-dictionary characters (not letters, digits, or blanks). */ 209static bool nondictionary[UCHAR_LIM]; 210 211/* Translation table folding lower case to upper. */ 212static char fold_toupper[UCHAR_LIM]; 213 214#define MONTHS_PER_YEAR 12 215 216/* Table mapping month names to integers. 217 Alphabetic order allows binary search. */ 218static struct month monthtab[] = 219{ 220 {"APR", 4}, 221 {"AUG", 8}, 222 {"DEC", 12}, 223 {"FEB", 2}, 224 {"JAN", 1}, 225 {"JUL", 7}, 226 {"JUN", 6}, 227 {"MAR", 3}, 228 {"MAY", 5}, 229 {"NOV", 11}, 230 {"OCT", 10}, 231 {"SEP", 9} 232}; 233 234/* During the merge phase, the number of files to merge at once. */ 235#define NMERGE_DEFAULT 16 236 237/* Minimum size for a merge or check buffer. */ 238#define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line)) 239 240/* Minimum sort size; the code might not work with smaller sizes. */ 241#define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE) 242 243/* The number of bytes needed for a merge or check buffer, which can 244 function relatively efficiently even if it holds only one line. If 245 a longer line is seen, this value is increased. */ 246static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024); 247 248/* The approximate maximum number of bytes of main memory to use, as 249 specified by the user. Zero if the user has not specified a size. */ 250static size_t sort_size; 251 252/* The guessed size for non-regular files. */ 253#define INPUT_FILE_SIZE_GUESS (1024 * 1024) 254 255/* Array of directory names in which any temporary files are to be created. */ 256static char const **temp_dirs; 257 258/* Number of temporary directory names used. */ 259static size_t temp_dir_count; 260 261/* Number of allocated slots in temp_dirs. */ 262static size_t temp_dir_alloc; 263 264/* Flag to reverse the order of all comparisons. */ 265static bool reverse; 266 267/* Flag for stable sort. This turns off the last ditch bytewise 268 comparison of lines, and instead leaves lines in the same order 269 they were read if all keys compare equal. */ 270static bool stable; 271 272/* If TAB has this value, blanks separate fields. */ 273enum { TAB_DEFAULT = CHAR_MAX + 1 }; 274 275/* Tab character separating fields. If TAB_DEFAULT, then fields are 276 separated by the empty string between a non-blank character and a blank 277 character. */ 278static int tab = TAB_DEFAULT; 279 280/* Flag to remove consecutive duplicate lines from the output. 281 Only the last of a sequence of equal lines will be output. */ 282static bool unique; 283 284/* Nonzero if any of the input files are the standard input. */ 285static bool have_read_stdin; 286 287/* List of key field comparisons to be tried. */ 288static struct keyfield *keylist; 289 290/* Program used to (de)compress temp files. Must accept -d. */ 291static char const *compress_program; 292 293/* Maximum number of files to merge in one go. If more than this 294 number are present, temp files will be used. */ 295static unsigned int nmerge = NMERGE_DEFAULT; 296 297static void sortlines_temp (struct line *, size_t, struct line *); 298 299/* Report MESSAGE for FILE, then clean up and exit. 300 If FILE is null, it represents standard output. */ 301 302static void die (char const *, char const *) ATTRIBUTE_NORETURN; 303static void 304die (char const *message, char const *file) 305{ 306 error (0, errno, "%s: %s", message, file ? file : _("standard output")); 307 exit (SORT_FAILURE); 308} 309 310void 311usage (int status) 312{ 313 if (status != EXIT_SUCCESS) 314 fprintf (stderr, _("Try `%s --help' for more information.\n"), 315 program_name); 316 else 317 { 318 printf (_("\ 319Usage: %s [OPTION]... [FILE]...\n\ 320 or: %s [OPTION]... --files0-from=F\n\ 321"), 322 program_name, program_name); 323 fputs (_("\ 324Write sorted concatenation of all FILE(s) to standard output.\n\ 325\n\ 326"), stdout); 327 fputs (_("\ 328Mandatory arguments to long options are mandatory for short options too.\n\ 329"), stdout); 330 fputs (_("\ 331Ordering options:\n\ 332\n\ 333"), stdout); 334 fputs (_("\ 335 -b, --ignore-leading-blanks ignore leading blanks\n\ 336 -d, --dictionary-order consider only blanks and alphanumeric characters\n\ 337 -f, --ignore-case fold lower case to upper case characters\n\ 338"), stdout); 339 fputs (_("\ 340 -g, --general-numeric-sort compare according to general numerical value\n\ 341 -i, --ignore-nonprinting consider only printable characters\n\ 342 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\ 343"), stdout); 344 fputs (_("\ 345 -h, --human-numeric-sort compare human readable numbers (e.g., 2K 1G)\n\ 346"), stdout); 347 fputs (_("\ 348 -n, --numeric-sort compare according to string numerical value\n\ 349 -R, --random-sort sort by random hash of keys\n\ 350 --random-source=FILE get random bytes from FILE\n\ 351 -r, --reverse reverse the result of comparisons\n\ 352"), stdout); 353 fputs (_("\ 354 --sort=WORD sort according to WORD:\n\ 355 general-numeric -g, human-numeric -h, month -M,\n\ 356 numeric -n, random -R, version -V\n\ 357 -V, --version-sort natural sort of (version) numbers within text\n\ 358\n\ 359"), stdout); 360 fputs (_("\ 361Other options:\n\ 362\n\ 363"), stdout); 364 fputs (_("\ 365 --batch-size=NMERGE merge at most NMERGE inputs at once;\n\ 366 for more use temp files\n\ 367"), stdout); 368 fputs (_("\ 369 -c, --check, --check=diagnose-first check for sorted input; do not sort\n\ 370 -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\ 371 --compress-program=PROG compress temporaries with PROG;\n\ 372 decompress them with PROG -d\n\ 373 --files0-from=F read input from the files specified by\n\ 374 NUL-terminated names in file F;\n\ 375 If F is - then read names from standard input\n\ 376"), stdout); 377 fputs (_("\ 378 -k, --key=POS1[,POS2] start a key at POS1 (origin 1), end it at POS2\n\ 379 (default end of line)\n\ 380 -m, --merge merge already sorted files; do not sort\n\ 381"), stdout); 382 fputs (_("\ 383 -o, --output=FILE write result to FILE instead of standard output\n\ 384 -s, --stable stabilize sort by disabling last-resort comparison\n\ 385 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\ 386"), stdout); 387 printf (_("\ 388 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\ 389 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\ 390 multiple options specify multiple directories\n\ 391 -u, --unique with -c, check for strict ordering;\n\ 392 without -c, output only the first of an equal run\n\ 393"), DEFAULT_TMPDIR); 394 fputs (_("\ 395 -z, --zero-terminated end lines with 0 byte, not newline\n\ 396"), stdout); 397 fputs (HELP_OPTION_DESCRIPTION, stdout); 398 fputs (VERSION_OPTION_DESCRIPTION, stdout); 399 fputs (_("\ 400\n\ 401POS is F[.C][OPTS], where F is the field number and C the character position\n\ 402in the field; both are origin 1. If neither -t nor -b is in effect, characters\n\ 403in a field are counted from the beginning of the preceding whitespace. OPTS is\n\ 404one or more single-letter ordering options, which override global ordering\n\ 405options for that key. If no key is given, use the entire line as the key.\n\ 406\n\ 407SIZE may be followed by the following multiplicative suffixes:\n\ 408"), stdout); 409 fputs (_("\ 410% 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\ 411\n\ 412With no FILE, or when FILE is -, read standard input.\n\ 413\n\ 414*** WARNING ***\n\ 415The locale specified by the environment affects sort order.\n\ 416Set LC_ALL=C to get the traditional sort order that uses\n\ 417native byte values.\n\ 418"), stdout ); 419 emit_ancillary_info (); 420 } 421 422 exit (status); 423} 424 425/* For long options that have no equivalent short option, use a 426 non-character as a pseudo short option, starting with CHAR_MAX + 1. */ 427enum 428{ 429 CHECK_OPTION = CHAR_MAX + 1, 430 COMPRESS_PROGRAM_OPTION, 431 FILES0_FROM_OPTION, 432 NMERGE_OPTION, 433 RANDOM_SOURCE_OPTION, 434 SORT_OPTION 435}; 436 437static char const short_options[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z"; 438 439static struct option const long_options[] = 440{ 441 {"ignore-leading-blanks", no_argument, NULL, 'b'}, 442 {"check", optional_argument, NULL, CHECK_OPTION}, 443 {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION}, 444 {"dictionary-order", no_argument, NULL, 'd'}, 445 {"ignore-case", no_argument, NULL, 'f'}, 446 {"files0-from", required_argument, NULL, FILES0_FROM_OPTION}, 447 {"general-numeric-sort", no_argument, NULL, 'g'}, 448 {"ignore-nonprinting", no_argument, NULL, 'i'}, 449 {"key", required_argument, NULL, 'k'}, 450 {"merge", no_argument, NULL, 'm'}, 451 {"month-sort", no_argument, NULL, 'M'}, 452 {"numeric-sort", no_argument, NULL, 'n'}, 453 {"human-numeric-sort", no_argument, NULL, 'h'}, 454 {"version-sort", no_argument, NULL, 'V'}, 455 {"random-sort", no_argument, NULL, 'R'}, 456 {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION}, 457 {"sort", required_argument, NULL, SORT_OPTION}, 458 {"output", required_argument, NULL, 'o'}, 459 {"reverse", no_argument, NULL, 'r'}, 460 {"stable", no_argument, NULL, 's'}, 461 {"batch-size", required_argument, NULL, NMERGE_OPTION}, 462 {"buffer-size", required_argument, NULL, 'S'}, 463 {"field-separator", required_argument, NULL, 't'}, 464 {"temporary-directory", required_argument, NULL, 'T'}, 465 {"unique", no_argument, NULL, 'u'}, 466 {"zero-terminated", no_argument, NULL, 'z'}, 467 {GETOPT_HELP_OPTION_DECL}, 468 {GETOPT_VERSION_OPTION_DECL}, 469 {NULL, 0, NULL, 0}, 470}; 471 472#define CHECK_TABLE \ 473 _ct_("quiet", 'C') \ 474 _ct_("silent", 'C') \ 475 _ct_("diagnose-first", 'c') 476 477static char const *const check_args[] = 478{ 479#define _ct_(_s, _c) _s, 480 CHECK_TABLE NULL 481#undef _ct_ 482}; 483static char const check_types[] = 484{ 485#define _ct_(_s, _c) _c, 486 CHECK_TABLE 487#undef _ct_ 488}; 489 490#define SORT_TABLE \ 491 _st_("general-numeric", 'g') \ 492 _st_("human-numeric", 'h') \ 493 _st_("month", 'M') \ 494 _st_("numeric", 'n') \ 495 _st_("random", 'R') \ 496 _st_("version", 'V') 497 498static char const *const sort_args[] = 499{ 500#define _st_(_s, _c) _s, 501 SORT_TABLE NULL 502#undef _st_ 503}; 504static char const sort_types[] = 505{ 506#define _st_(_s, _c) _c, 507 SORT_TABLE 508#undef _st_ 509}; 510 511/* The set of signals that are caught. */ 512static sigset_t caught_signals; 513 514/* Critical section status. */ 515struct cs_status 516{ 517 bool valid; 518 sigset_t sigs; 519}; 520 521/* Enter a critical section. */ 522static struct cs_status 523cs_enter (void) 524{ 525 struct cs_status status; 526 status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0); 527 return status; 528} 529 530/* Leave a critical section. */ 531static void 532cs_leave (struct cs_status status) 533{ 534 if (status.valid) 535 { 536 /* Ignore failure when restoring the signal mask. */ 537 sigprocmask (SIG_SETMASK, &status.sigs, NULL); 538 } 539} 540 541/* The list of temporary files. */ 542struct tempnode 543{ 544 struct tempnode *volatile next; 545 pid_t pid; /* If compressed, the pid of compressor, else zero */ 546 char name[1]; /* Actual size is 1 + file name length. */ 547}; 548static struct tempnode *volatile temphead; 549static struct tempnode *volatile *temptail = &temphead; 550 551struct sortfile 552{ 553 char const *name; 554 pid_t pid; /* If compressed, the pid of compressor, else zero */ 555}; 556 557/* A table where we store compression process states. We clean up all 558 processes in a timely manner so as not to exhaust system resources, 559 so we store the info on whether the process is still running, or has 560 been reaped here. */ 561static Hash_table *proctab; 562 563enum { INIT_PROCTAB_SIZE = 47 }; 564 565enum procstate { ALIVE, ZOMBIE }; 566 567/* A proctab entry. The COUNT field is there in case we fork a new 568 compression process that has the same PID as an old zombie process 569 that is still in the table (because the process to decompress the 570 temp file it was associated with hasn't started yet). */ 571struct procnode 572{ 573 pid_t pid; 574 enum procstate state; 575 size_t count; 576}; 577 578static size_t 579proctab_hasher (const void *entry, size_t tabsize) 580{ 581 const struct procnode *node = entry; 582 return node->pid % tabsize; 583} 584 585static bool 586proctab_comparator (const void *e1, const void *e2) 587{ 588 const struct procnode *n1 = e1, *n2 = e2; 589 return n1->pid == n2->pid; 590} 591 592/* The total number of forked processes (compressors and decompressors) 593 that have not been reaped yet. */ 594static size_t nprocs; 595 596/* The number of child processes we'll allow before we try to reap some. */ 597enum { MAX_PROCS_BEFORE_REAP = 2 }; 598 599/* If 0 < PID, wait for the child process with that PID to exit. 600 If PID is -1, clean up a random child process which has finished and 601 return the process ID of that child. If PID is -1 and no processes 602 have quit yet, return 0 without waiting. */ 603 604static pid_t 605reap (pid_t pid) 606{ 607 int status; 608 pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0); 609 610 if (cpid < 0) 611 error (SORT_FAILURE, errno, _("waiting for %s [-d]"), 612 compress_program); 613 else if (0 < cpid) 614 { 615 if (! WIFEXITED (status) || WEXITSTATUS (status)) 616 error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"), 617 compress_program); 618 --nprocs; 619 } 620 621 return cpid; 622} 623 624/* Add the PID of a running compression process to proctab, or update 625 the entry COUNT and STATE fields if it's already there. This also 626 creates the table for us the first time it's called. */ 627 628static void 629register_proc (pid_t pid) 630{ 631 struct procnode test, *node; 632 633 if (! proctab) 634 { 635 proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL, 636 proctab_hasher, 637 proctab_comparator, 638 free); 639 if (! proctab) 640 xalloc_die (); 641 } 642 643 test.pid = pid; 644 node = hash_lookup (proctab, &test); 645 if (node) 646 { 647 node->state = ALIVE; 648 ++node->count; 649 } 650 else 651 { 652 node = xmalloc (sizeof *node); 653 node->pid = pid; 654 node->state = ALIVE; 655 node->count = 1; 656 if (hash_insert (proctab, node) == NULL) 657 xalloc_die (); 658 } 659} 660 661/* This is called when we reap a random process. We don't know 662 whether we have reaped a compression process or a decompression 663 process until we look in the table. If there's an ALIVE entry for 664 it, then we have reaped a compression process, so change the state 665 to ZOMBIE. Otherwise, it's a decompression processes, so ignore it. */ 666 667static void 668update_proc (pid_t pid) 669{ 670 struct procnode test, *node; 671 672 test.pid = pid; 673 node = hash_lookup (proctab, &test); 674 if (node) 675 node->state = ZOMBIE; 676} 677 678/* This is for when we need to wait for a compression process to exit. 679 If it has a ZOMBIE entry in the table then it's already dead and has 680 been reaped. Note that if there's an ALIVE entry for it, it still may 681 already have died and been reaped if a second process was created with 682 the same PID. This is probably exceedingly rare, but to be on the safe 683 side we will have to wait for any compression process with this PID. */ 684 685static void 686wait_proc (pid_t pid) 687{ 688 struct procnode test, *node; 689 690 test.pid = pid; 691 node = hash_lookup (proctab, &test); 692 if (node->state == ALIVE) 693 reap (pid); 694 695 node->state = ZOMBIE; 696 if (! --node->count) 697 { 698 hash_delete (proctab, node); 699 free (node); 700 } 701} 702 703/* Keep reaping finished children as long as there are more to reap. 704 This doesn't block waiting for any of them, it only reaps those 705 that are already dead. */ 706 707static void 708reap_some (void) 709{ 710 pid_t pid; 711 712 while (0 < nprocs && (pid = reap (-1))) 713 update_proc (pid); 714} 715 716/* Clean up any remaining temporary files. */ 717 718static void 719cleanup (void) 720{ 721 struct tempnode const *node; 722 723 for (node = temphead; node; node = node->next) 724 unlink (node->name); 725 temphead = NULL; 726} 727 728/* Cleanup actions to take when exiting. */ 729 730static void 731exit_cleanup (void) 732{ 733 if (temphead) 734 { 735 /* Clean up any remaining temporary files in a critical section so 736 that a signal handler does not try to clean them too. */ 737 struct cs_status cs = cs_enter (); 738 cleanup (); 739 cs_leave (cs); 740 } 741 742 close_stdout (); 743} 744 745/* Create a new temporary file, returning its newly allocated tempnode. 746 Store into *PFD the file descriptor open for writing. 747 If the creation fails, return NULL and store -1 into *PFD if the 748 failure is due to file descriptor exhaustion and 749 SURVIVE_FD_EXHAUSTION; otherwise, die. */ 750 751static struct tempnode * 752create_temp_file (int *pfd, bool survive_fd_exhaustion) 753{ 754 static char const slashbase[] = "/sortXXXXXX"; 755 static size_t temp_dir_index; 756 int fd; 757 int saved_errno; 758 char const *temp_dir = temp_dirs[temp_dir_index]; 759 size_t len = strlen (temp_dir); 760 struct tempnode *node = 761 xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase); 762 char *file = node->name; 763 struct cs_status cs; 764 765 memcpy (file, temp_dir, len); 766 memcpy (file + len, slashbase, sizeof slashbase); 767 node->next = NULL; 768 node->pid = 0; 769 if (++temp_dir_index == temp_dir_count) 770 temp_dir_index = 0; 771 772 /* Create the temporary file in a critical section, to avoid races. */ 773 cs = cs_enter (); 774 fd = mkstemp (file); 775 if (0 <= fd) 776 { 777 *temptail = node; 778 temptail = &node->next; 779 } 780 saved_errno = errno; 781 cs_leave (cs); 782 errno = saved_errno; 783 784 if (fd < 0) 785 { 786 if (! (survive_fd_exhaustion && errno == EMFILE)) 787 error (SORT_FAILURE, errno, _("cannot create temporary file in %s"), 788 quote (temp_dir)); 789 free (node); 790 node = NULL; 791 } 792 793 *pfd = fd; 794 return node; 795} 796 797/* Return a stream for FILE, opened with mode HOW. A null FILE means 798 standard output; HOW should be "w". When opening for input, "-" 799 means standard input. To avoid confusion, do not return file 800 descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when 801 opening an ordinary FILE. Return NULL if unsuccessful. */ 802 803static FILE * 804stream_open (const char *file, const char *how) 805{ 806 if (!file) 807 return stdout; 808 if (STREQ (file, "-") && *how == 'r') 809 { 810 have_read_stdin = true; 811 return stdin; 812 } 813 return fopen (file, how); 814} 815 816/* Same as stream_open, except always return a non-null value; die on 817 failure. */ 818 819static FILE * 820xfopen (const char *file, const char *how) 821 { 822 FILE *fp = stream_open (file, how); 823 if (!fp) 824 die (_("open failed"), file); 825 return fp; 826} 827 828/* Close FP, whose name is FILE, and report any errors. */ 829 830static void 831xfclose (FILE *fp, char const *file) 832{ 833 switch (fileno (fp)) 834 { 835 case STDIN_FILENO: 836 /* Allow reading stdin from tty more than once. */ 837 if (feof (fp)) 838 clearerr (fp); 839 break; 840 841 case STDOUT_FILENO: 842 /* Don't close stdout just yet. close_stdout does that. */ 843 if (fflush (fp) != 0) 844 die (_("fflush failed"), file); 845 break; 846 847 default: 848 if (fclose (fp) != 0) 849 die (_("close failed"), file); 850 break; 851 } 852} 853 854static void 855dup2_or_die (int oldfd, int newfd) 856{ 857 if (dup2 (oldfd, newfd) < 0) 858 error (SORT_FAILURE, errno, _("dup2 failed")); 859} 860 861/* Fork a child process for piping to and do common cleanup. The 862 TRIES parameter tells us how many times to try to fork before 863 giving up. Return the PID of the child, or -1 (setting errno) 864 on failure. */ 865 866static pid_t 867pipe_fork (int pipefds[2], size_t tries) 868{ 869#if HAVE_WORKING_FORK 870 struct tempnode *saved_temphead; 871 int saved_errno; 872 double wait_retry = 0.25; 873 pid_t pid IF_LINT (= -1); 874 struct cs_status cs; 875 876 if (pipe (pipefds) < 0) 877 return -1; 878 879 while (tries--) 880 { 881 /* This is so the child process won't delete our temp files 882 if it receives a signal before exec-ing. */ 883 cs = cs_enter (); 884 saved_temphead = temphead; 885 temphead = NULL; 886 887 pid = fork (); 888 saved_errno = errno; 889 if (pid) 890 temphead = saved_temphead; 891 892 cs_leave (cs); 893 errno = saved_errno; 894 895 if (0 <= pid || errno != EAGAIN) 896 break; 897 else 898 { 899 xnanosleep (wait_retry); 900 wait_retry *= 2; 901 reap_some (); 902 } 903 } 904 905 if (pid < 0) 906 { 907 saved_errno = errno; 908 close (pipefds[0]); 909 close (pipefds[1]); 910 errno = saved_errno; 911 } 912 else if (pid == 0) 913 { 914 close (STDIN_FILENO); 915 close (STDOUT_FILENO); 916 } 917 else 918 ++nprocs; 919 920 return pid; 921 922#else /* ! HAVE_WORKING_FORK */ 923 return -1; 924#endif 925} 926 927/* Create a temporary file and start a compression program to filter output 928 to that file. Set *PFP to the file handle and if PPID is non-NULL, 929 set *PPID to the PID of the newly-created process. If the creation 930 fails, return NULL if the failure is due to file descriptor 931 exhaustion and SURVIVE_FD_EXHAUSTION; otherwise, die. */ 932 933static char * 934maybe_create_temp (FILE **pfp, pid_t *ppid, bool survive_fd_exhaustion) 935{ 936 int tempfd; 937 struct tempnode *node = create_temp_file (&tempfd, survive_fd_exhaustion); 938 char *name; 939 940 if (! node) 941 return NULL; 942 943 name = node->name; 944 945 if (compress_program) 946 { 947 int pipefds[2]; 948 949 node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS); 950 if (0 < node->pid) 951 { 952 close (tempfd); 953 close (pipefds[0]); 954 tempfd = pipefds[1]; 955 956 register_proc (node->pid); 957 } 958 else if (node->pid == 0) 959 { 960 close (pipefds[1]); 961 dup2_or_die (tempfd, STDOUT_FILENO); 962 close (tempfd); 963 dup2_or_die (pipefds[0], STDIN_FILENO); 964 close (pipefds[0]); 965 966 if (execlp (compress_program, compress_program, (char *) NULL) < 0) 967 error (SORT_FAILURE, errno, _("couldn't execute %s"), 968 compress_program); 969 } 970 else 971 node->pid = 0; 972 } 973 974 *pfp = fdopen (tempfd, "w"); 975 if (! *pfp) 976 die (_("couldn't create temporary file"), name); 977 978 if (ppid) 979 *ppid = node->pid; 980 981 return name; 982} 983 984/* Create a temporary file and start a compression program to filter output 985 to that file. Set *PFP to the file handle and if *PPID is non-NULL, 986 set it to the PID of the newly-created process. Die on failure. */ 987 988static char * 989create_temp (FILE **pfp, pid_t *ppid) 990{ 991 return maybe_create_temp (pfp, ppid, false); 992} 993 994/* Open a compressed temp file and start a decompression process through 995 which to filter the input. PID must be the valid processes ID of the 996 process used to compress the file. Return NULL (setting errno to 997 EMFILE) if we ran out of file descriptors, and die on any other 998 kind of failure. */ 999 1000static FILE * 1001open_temp (const char *name, pid_t pid) 1002{ 1003 int tempfd, pipefds[2]; 1004 FILE *fp = NULL; 1005 1006 wait_proc (pid); 1007 1008 tempfd = open (name, O_RDONLY); 1009 if (tempfd < 0) 1010 return NULL; 1011 1012 switch (pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS)) 1013 { 1014 case -1: 1015 if (errno != EMFILE) 1016 error (SORT_FAILURE, errno, _("couldn't create process for %s -d"), 1017 compress_program); 1018 close (tempfd); 1019 errno = EMFILE; 1020 break; 1021 1022 case 0: 1023 close (pipefds[0]); 1024 dup2_or_die (tempfd, STDIN_FILENO); 1025 close (tempfd); 1026 dup2_or_die (pipefds[1], STDOUT_FILENO); 1027 close (pipefds[1]); 1028 1029 execlp (compress_program, compress_program, "-d", (char *) NULL); 1030 error (SORT_FAILURE, errno, _("couldn't execute %s -d"), 1031 compress_program); 1032 1033 default: 1034 close (tempfd); 1035 close (pipefds[1]); 1036 1037 fp = fdopen (pipefds[0], "r"); 1038 if (! fp) 1039 { 1040 int saved_errno = errno; 1041 close (pipefds[0]); 1042 errno = saved_errno; 1043 } 1044 break; 1045 } 1046 1047 return fp; 1048} 1049 1050static void 1051write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file) 1052{ 1053 if (fwrite (buf, 1, n_bytes, fp) != n_bytes) 1054 die (_("write failed"), output_file); 1055} 1056 1057/* Append DIR to the array of temporary directory names. */ 1058static void 1059add_temp_dir (char const *dir) 1060{ 1061 if (temp_dir_count == temp_dir_alloc) 1062 temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc); 1063 1064 temp_dirs[temp_dir_count++] = dir; 1065} 1066 1067/* Remove NAME from the list of temporary files. */ 1068 1069static void 1070zaptemp (const char *name) 1071{ 1072 struct tempnode *volatile *pnode; 1073 struct tempnode *node; 1074 struct tempnode *next; 1075 int unlink_status; 1076 int unlink_errno = 0; 1077 struct cs_status cs; 1078 1079 for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next) 1080 continue; 1081 1082 /* Unlink the temporary file in a critical section to avoid races. */ 1083 next = node->next; 1084 cs = cs_enter (); 1085 unlink_status = unlink (name); 1086 unlink_errno = errno; 1087 *pnode = next; 1088 cs_leave (cs); 1089 1090 if (unlink_status != 0) 1091 error (0, unlink_errno, _("warning: cannot remove: %s"), name); 1092 if (! next) 1093 temptail = pnode; 1094 free (node); 1095} 1096 1097#if HAVE_NL_LANGINFO 1098 1099static int 1100struct_month_cmp (const void *m1, const void *m2) 1101{ 1102 struct month const *month1 = m1; 1103 struct month const *month2 = m2; 1104 return strcmp (month1->name, month2->name); 1105} 1106 1107#endif 1108 1109/* Initialize the character class tables. */ 1110 1111static void 1112inittables (void) 1113{ 1114 size_t i; 1115 1116 for (i = 0; i < UCHAR_LIM; ++i) 1117 { 1118 blanks[i] = !! isblank (i); 1119 nonprinting[i] = ! isprint (i); 1120 nondictionary[i] = ! isalnum (i) && ! isblank (i); 1121 fold_toupper[i] = toupper (i); 1122 } 1123 1124#if HAVE_NL_LANGINFO 1125 /* If we're not in the "C" locale, read different names for months. */ 1126 if (hard_LC_TIME) 1127 { 1128 for (i = 0; i < MONTHS_PER_YEAR; i++) 1129 { 1130 char const *s; 1131 size_t s_len; 1132 size_t j; 1133 char *name; 1134 1135 s = (char *) nl_langinfo (ABMON_1 + i); 1136 s_len = strlen (s); 1137 monthtab[i].name = name = xmalloc (s_len + 1); 1138 monthtab[i].val = i + 1; 1139 1140 for (j = 0; j < s_len; j++) 1141 name[j] = fold_toupper[to_uchar (s[j])]; 1142 name[j] = '\0'; 1143 } 1144 qsort ((void *) monthtab, MONTHS_PER_YEAR, 1145 sizeof *monthtab, struct_month_cmp); 1146 } 1147#endif 1148} 1149 1150/* Specify how many inputs may be merged at once. 1151 This may be set on the command-line with the 1152 --batch-size option. */ 1153static void 1154specify_nmerge (int oi, char c, char const *s) 1155{ 1156 uintmax_t n; 1157 struct rlimit rlimit; 1158 enum strtol_error e = xstrtoumax (s, NULL, 10, &n, NULL); 1159 1160 /* Try to find out how many file descriptors we'll be able 1161 to open. We need at least nmerge + 3 (STDIN_FILENO, 1162 STDOUT_FILENO and STDERR_FILENO). */ 1163 unsigned int max_nmerge = ((getrlimit (RLIMIT_NOFILE, &rlimit) == 0 1164 ? rlimit.rlim_cur 1165 : OPEN_MAX) 1166 - 3); 1167 1168 if (e == LONGINT_OK) 1169 { 1170 nmerge = n; 1171 if (nmerge != n) 1172 e = LONGINT_OVERFLOW; 1173 else 1174 { 1175 if (nmerge < 2) 1176 { 1177 error (0, 0, _("invalid --%s argument %s"), 1178 long_options[oi].name, quote(s)); 1179 error (SORT_FAILURE, 0, 1180 _("minimum --%s argument is %s"), 1181 long_options[oi].name, quote("2")); 1182 } 1183 else if (max_nmerge < nmerge) 1184 { 1185 e = LONGINT_OVERFLOW; 1186 } 1187 else 1188 return; 1189 } 1190 } 1191 1192 if (e == LONGINT_OVERFLOW) 1193 { 1194 char max_nmerge_buf[INT_BUFSIZE_BOUND (unsigned int)]; 1195 error (0, 0, _("--%s argument %s too large"), 1196 long_options[oi].name, quote(s)); 1197 error (SORT_FAILURE, 0, 1198 _("maximum --%s argument with current rlimit is %s"), 1199 long_options[oi].name, 1200 uinttostr (max_nmerge, max_nmerge_buf)); 1201 } 1202 else 1203 xstrtol_fatal (e, oi, c, long_options, s); 1204} 1205 1206/* Specify the amount of main memory to use when sorting. */ 1207static void 1208specify_sort_size (int oi, char c, char const *s) 1209{ 1210 uintmax_t n; 1211 char *suffix; 1212 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ"); 1213 1214 /* The default unit is KiB. */ 1215 if (e == LONGINT_OK && ISDIGIT (suffix[-1])) 1216 { 1217 if (n <= UINTMAX_MAX / 1024) 1218 n *= 1024; 1219 else 1220 e = LONGINT_OVERFLOW; 1221 } 1222 1223 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */ 1224 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1]) 1225 switch (suffix[0]) 1226 { 1227 case 'b': 1228 e = LONGINT_OK; 1229 break; 1230 1231 case '%': 1232 { 1233 double mem = physmem_total () * n / 100; 1234 1235 /* Use "<", not "<=", to avoid problems with rounding. */ 1236 if (mem < UINTMAX_MAX) 1237 { 1238 n = mem; 1239 e = LONGINT_OK; 1240 } 1241 else 1242 e = LONGINT_OVERFLOW; 1243 } 1244 break; 1245 } 1246 1247 if (e == LONGINT_OK) 1248 { 1249 /* If multiple sort sizes are specified, take the maximum, so 1250 that option order does not matter. */ 1251 if (n < sort_size) 1252 return; 1253 1254 sort_size = n; 1255 if (sort_size == n) 1256 { 1257 sort_size = MAX (sort_size, MIN_SORT_SIZE); 1258 return; 1259 } 1260 1261 e = LONGINT_OVERFLOW; 1262 } 1263 1264 xstrtol_fatal (e, oi, c, long_options, s); 1265} 1266 1267/* Return the default sort size. */ 1268static size_t 1269default_sort_size (void) 1270{ 1271 /* Let MEM be available memory or 1/8 of total memory, whichever 1272 is greater. */ 1273 double avail = physmem_available (); 1274 double total = physmem_total (); 1275 double mem = MAX (avail, total / 8); 1276 struct rlimit rlimit; 1277 1278 /* Let SIZE be MEM, but no more than the maximum object size or 1279 system resource limits. Avoid the MIN macro here, as it is not 1280 quite right when only one argument is floating point. Don't 1281 bother to check for values like RLIM_INFINITY since in practice 1282 they are not much less than SIZE_MAX. */ 1283 size_t size = SIZE_MAX; 1284 if (mem < size) 1285 size = mem; 1286 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size) 1287 size = rlimit.rlim_cur; 1288#ifdef RLIMIT_AS 1289 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size) 1290 size = rlimit.rlim_cur; 1291#endif 1292 1293 /* Leave a large safety margin for the above limits, as failure can 1294 occur when they are exceeded. */ 1295 size /= 2; 1296 1297#ifdef RLIMIT_RSS 1298 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc. 1299 Exceeding RSS is not fatal, but can be quite slow. */ 1300 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size) 1301 size = rlimit.rlim_cur / 16 * 15; 1302#endif 1303 1304 /* Use no less than the minimum. */ 1305 return MAX (size, MIN_SORT_SIZE); 1306} 1307 1308/* Return the sort buffer size to use with the input files identified 1309 by FPS and FILES, which are alternate names of the same files. 1310 NFILES gives the number of input files; NFPS may be less. Assume 1311 that each input line requires LINE_BYTES extra bytes' worth of line 1312 information. Do not exceed the size bound specified by the user 1313 (or a default size bound, if the user does not specify one). */ 1314 1315static size_t 1316sort_buffer_size (FILE *const *fps, size_t nfps, 1317 char *const *files, size_t nfiles, 1318 size_t line_bytes) 1319{ 1320 /* A bound on the input size. If zero, the bound hasn't been 1321 determined yet. */ 1322 static size_t size_bound; 1323 1324 /* In the worst case, each input byte is a newline. */ 1325 size_t worst_case_per_input_byte = line_bytes + 1; 1326 1327 /* Keep enough room for one extra input line and an extra byte. 1328 This extra room might be needed when preparing to read EOF. */ 1329 size_t size = worst_case_per_input_byte + 1; 1330 1331 size_t i; 1332 1333 for (i = 0; i < nfiles; i++) 1334 { 1335 struct stat st; 1336 off_t file_size; 1337 size_t worst_case; 1338 1339 if ((i < nfps ? fstat (fileno (fps[i]), &st) 1340 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st) 1341 : stat (files[i], &st)) 1342 != 0) 1343 die (_("stat failed"), files[i]); 1344 1345 if (S_ISREG (st.st_mode)) 1346 file_size = st.st_size; 1347 else 1348 { 1349 /* The file has unknown size. If the user specified a sort 1350 buffer size, use that; otherwise, guess the size. */ 1351 if (sort_size) 1352 return sort_size; 1353 file_size = INPUT_FILE_SIZE_GUESS; 1354 } 1355 1356 if (! size_bound) 1357 { 1358 size_bound = sort_size; 1359 if (! size_bound) 1360 size_bound = default_sort_size (); 1361 } 1362 1363 /* Add the amount of memory needed to represent the worst case 1364 where the input consists entirely of newlines followed by a 1365 single non-newline. Check for overflow. */ 1366 worst_case = file_size * worst_case_per_input_byte + 1; 1367 if (file_size != worst_case / worst_case_per_input_byte 1368 || size_bound - size <= worst_case) 1369 return size_bound; 1370 size += worst_case; 1371 } 1372 1373 return size; 1374} 1375 1376/* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES 1377 must be at least sizeof (struct line). Allocate ALLOC bytes 1378 initially. */ 1379 1380static void 1381initbuf (struct buffer *buf, size_t line_bytes, size_t alloc) 1382{ 1383 /* Ensure that the line array is properly aligned. If the desired 1384 size cannot be allocated, repeatedly halve it until allocation 1385 succeeds. The smaller allocation may hurt overall performance, 1386 but that's better than failing. */ 1387 for (;;) 1388 { 1389 alloc += sizeof (struct line) - alloc % sizeof (struct line); 1390 buf->buf = malloc (alloc); 1391 if (buf->buf) 1392 break; 1393 alloc /= 2; 1394 if (alloc <= line_bytes + 1) 1395 xalloc_die (); 1396 } 1397 1398 buf->line_bytes = line_bytes; 1399 buf->alloc = alloc; 1400 buf->used = buf->left = buf->nlines = 0; 1401 buf->eof = false; 1402} 1403 1404/* Return one past the limit of the line array. */ 1405 1406static inline struct line * 1407buffer_linelim (struct buffer const *buf) 1408{ 1409 return (struct line *) (buf->buf + buf->alloc); 1410} 1411 1412/* Return a pointer to the first character of the field specified 1413 by KEY in LINE. */ 1414 1415static char * 1416begfield (const struct line *line, const struct keyfield *key) 1417{ 1418 char *ptr = line->text, *lim = ptr + line->length - 1; 1419 size_t sword = key->sword; 1420 size_t schar = key->schar; 1421 1422 /* The leading field separator itself is included in a field when -t 1423 is absent. */ 1424 1425 if (tab != TAB_DEFAULT) 1426 while (ptr < lim && sword--) 1427 { 1428 while (ptr < lim && *ptr != tab) 1429 ++ptr; 1430 if (ptr < lim) 1431 ++ptr; 1432 } 1433 else 1434 while (ptr < lim && sword--) 1435 { 1436 while (ptr < lim && blanks[to_uchar (*ptr)]) 1437 ++ptr; 1438 while (ptr < lim && !blanks[to_uchar (*ptr)]) 1439 ++ptr; 1440 } 1441 1442 /* If we're ignoring leading blanks when computing the Start 1443 of the field, skip past them here. */ 1444 if (key->skipsblanks) 1445 while (ptr < lim && blanks[to_uchar (*ptr)]) 1446 ++ptr; 1447 1448 /* Advance PTR by SCHAR (if possible), but no further than LIM. */ 1449 ptr = MIN (lim, ptr + schar); 1450 1451 return ptr; 1452} 1453 1454/* Return the limit of (a pointer to the first character after) the field 1455 in LINE specified by KEY. */ 1456 1457static char * 1458limfield (const struct line *line, const struct keyfield *key) 1459{ 1460 char *ptr = line->text, *lim = ptr + line->length - 1; 1461 size_t eword = key->eword, echar = key->echar; 1462 1463 if (echar == 0) 1464 eword++; /* Skip all of end field. */ 1465 1466 /* Move PTR past EWORD fields or to one past the last byte on LINE, 1467 whichever comes first. If there are more than EWORD fields, leave 1468 PTR pointing at the beginning of the field having zero-based index, 1469 EWORD. If a delimiter character was specified (via -t), then that 1470 `beginning' is the first character following the delimiting TAB. 1471 Otherwise, leave PTR pointing at the first `blank' character after 1472 the preceding field. */ 1473 if (tab != TAB_DEFAULT) 1474 while (ptr < lim && eword--) 1475 { 1476 while (ptr < lim && *ptr != tab) 1477 ++ptr; 1478 if (ptr < lim && (eword || echar)) 1479 ++ptr; 1480 } 1481 else 1482 while (ptr < lim && eword--) 1483 { 1484 while (ptr < lim && blanks[to_uchar (*ptr)]) 1485 ++ptr; 1486 while (ptr < lim && !blanks[to_uchar (*ptr)]) 1487 ++ptr; 1488 } 1489 1490#ifdef POSIX_UNSPECIFIED 1491 /* The following block of code makes GNU sort incompatible with 1492 standard Unix sort, so it's ifdef'd out for now. 1493 The POSIX spec isn't clear on how to interpret this. 1494 FIXME: request clarification. 1495 1496 From: kwzh@gnu.ai.mit.edu (Karl Heuer) 1497 Date: Thu, 30 May 96 12:20:41 -0400 1498 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.] 1499 1500 [...]I believe I've found another bug in `sort'. 1501 1502 $ cat /tmp/sort.in 1503 a b c 2 d 1504 pq rs 1 t 1505 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in 1506 a b c 2 d 1507 pq rs 1 t 1508 $ /bin/sort -k1.7,1.7 </tmp/sort.in 1509 pq rs 1 t 1510 a b c 2 d 1511 1512 Unix sort produced the answer I expected: sort on the single character 1513 in column 7. GNU sort produced different results, because it disagrees 1514 on the interpretation of the key-end spec "M.N". Unix sort reads this 1515 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean 1516 "skip M-1 fields, then either N-1 characters or the rest of the current 1517 field, whichever comes first". This extra clause applies only to 1518 key-ends, not key-starts. 1519 */ 1520 1521 /* Make LIM point to the end of (one byte past) the current field. */ 1522 if (tab != TAB_DEFAULT) 1523 { 1524 char *newlim; 1525 newlim = memchr (ptr, tab, lim - ptr); 1526 if (newlim) 1527 lim = newlim; 1528 } 1529 else 1530 { 1531 char *newlim; 1532 newlim = ptr; 1533 while (newlim < lim && blanks[to_uchar (*newlim)]) 1534 ++newlim; 1535 while (newlim < lim && !blanks[to_uchar (*newlim)]) 1536 ++newlim; 1537 lim = newlim; 1538 } 1539#endif 1540 1541 if (echar != 0) /* We need to skip over a portion of the end field. */ 1542 { 1543 /* If we're ignoring leading blanks when computing the End 1544 of the field, skip past them here. */ 1545 if (key->skipeblanks) 1546 while (ptr < lim && blanks[to_uchar (*ptr)]) 1547 ++ptr; 1548 1549 /* Advance PTR by ECHAR (if possible), but no further than LIM. */ 1550 ptr = MIN (lim, ptr + echar); 1551 } 1552 1553 return ptr; 1554} 1555 1556/* Fill BUF reading from FP, moving buf->left bytes from the end 1557 of buf->buf to the beginning first. If EOF is reached and the 1558 file wasn't terminated by a newline, supply one. Set up BUF's line 1559 table too. FILE is the name of the file corresponding to FP. 1560 Return true if some input was read. */ 1561 1562static bool 1563fillbuf (struct buffer *buf, FILE *fp, char const *file) 1564{ 1565 struct keyfield const *key = keylist; 1566 char eol = eolchar; 1567 size_t line_bytes = buf->line_bytes; 1568 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE; 1569 1570 if (buf->eof) 1571 return false; 1572 1573 if (buf->used != buf->left) 1574 { 1575 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left); 1576 buf->used = buf->left; 1577 buf->nlines = 0; 1578 } 1579 1580 for (;;) 1581 { 1582 char *ptr = buf->buf + buf->used; 1583 struct line *linelim = buffer_linelim (buf); 1584 struct line *line = linelim - buf->nlines; 1585 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr; 1586 char *line_start = buf->nlines ? line->text + line->length : buf->buf; 1587 1588 while (line_bytes + 1 < avail) 1589 { 1590 /* Read as many bytes as possible, but do not read so many 1591 bytes that there might not be enough room for the 1592 corresponding line array. The worst case is when the 1593 rest of the input file consists entirely of newlines, 1594 except that the last byte is not a newline. */ 1595 size_t readsize = (avail - 1) / (line_bytes + 1); 1596 size_t bytes_read = fread (ptr, 1, readsize, fp); 1597 char *ptrlim = ptr + bytes_read; 1598 char *p; 1599 avail -= bytes_read; 1600 1601 if (bytes_read != readsize) 1602 { 1603 if (ferror (fp)) 1604 die (_("read failed"), file); 1605 if (feof (fp)) 1606 { 1607 buf->eof = true; 1608 if (buf->buf == ptrlim) 1609 return false; 1610 if (ptrlim[-1] != eol) 1611 *ptrlim++ = eol; 1612 } 1613 } 1614 1615 /* Find and record each line in the just-read input. */ 1616 while ((p = memchr (ptr, eol, ptrlim - ptr))) 1617 { 1618 ptr = p + 1; 1619 line--; 1620 line->text = line_start; 1621 line->length = ptr - line_start; 1622 mergesize = MAX (mergesize, line->length); 1623 avail -= line_bytes; 1624 1625 if (key) 1626 { 1627 /* Precompute the position of the first key for 1628 efficiency. */ 1629 line->keylim = (key->eword == SIZE_MAX 1630 ? p 1631 : limfield (line, key)); 1632 1633 if (key->sword != SIZE_MAX) 1634 line->keybeg = begfield (line, key); 1635 else 1636 { 1637 if (key->skipsblanks) 1638 while (blanks[to_uchar (*line_start)]) 1639 line_start++; 1640 line->keybeg = line_start; 1641 } 1642 } 1643 1644 line_start = ptr; 1645 } 1646 1647 ptr = ptrlim; 1648 if (buf->eof) 1649 break; 1650 } 1651 1652 buf->used = ptr - buf->buf; 1653 buf->nlines = buffer_linelim (buf) - line; 1654 if (buf->nlines != 0) 1655 { 1656 buf->left = ptr - line_start; 1657 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE; 1658 return true; 1659 } 1660 1661 { 1662 /* The current input line is too long to fit in the buffer. 1663 Double the buffer size and try again, keeping it properly 1664 aligned. */ 1665 size_t line_alloc = buf->alloc / sizeof (struct line); 1666 buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line)); 1667 buf->alloc = line_alloc * sizeof (struct line); 1668 } 1669 } 1670} 1671 1672/* Compare strings A and B as numbers without explicitly converting them to 1673 machine numbers. Comparatively slow for short strings, but asymptotically 1674 hideously fast. */ 1675 1676static int 1677numcompare (const char *a, const char *b) 1678{ 1679 while (blanks[to_uchar (*a)]) 1680 a++; 1681 while (blanks[to_uchar (*b)]) 1682 b++; 1683 1684 return strnumcmp (a, b, decimal_point, thousands_sep); 1685} 1686 1687/* Exit with an error if a mixture of SI and IEC units detected. */ 1688 1689static void 1690check_mixed_SI_IEC (char prefix, struct keyfield *key) 1691{ 1692 int si_present = prefix == 'i'; 1693 if (key->si_present != -1 && si_present != key->si_present) 1694 error (SORT_FAILURE, 0, _("both SI and IEC prefixes present on units")); 1695 key->si_present = si_present; 1696} 1697 1698/* Return an integer which represents the order of magnitude of 1699 the unit following the number. NUMBER can contain thousands separators 1700 or a decimal point, but not have preceeding blanks. 1701 Negative numbers return a negative unit order. */ 1702 1703static int 1704find_unit_order (const char *number, struct keyfield *key) 1705{ 1706 static const char orders [UCHAR_LIM] = 1707 { 1708#if SOME_DAY_WE_WILL_REQUIRE_C99 1709 ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8, 1710 ['k']=1, 1711#else 1712 /* Generate the following table with this command: 1713 perl -e 'my %a=(k=>1, K=>1, M=>2, G=>3, T=>4, P=>5, E=>6, Z=>7, Y=>8); 1714 foreach my $i (0..255) {my $c=chr($i); $a{$c} ||= 0;print "$a{$c}, "}'\ 1715 |fmt */ 1716 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1717 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1718 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 1719 0, 0, 0, 1, 0, 2, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0, 1720 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1721 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1722 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1723 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1724 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1725 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1726 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 1727#endif 1728 }; 1729 1730 const unsigned char *p = number; 1731 1732 int sign = 1; 1733 1734 if (*p == '-') 1735 { 1736 sign = -1; 1737 p++; 1738 } 1739 1740 /* Scan to end of number. 1741 Decimals or separators not followed by digits stop the scan. 1742 Numbers ending in decimals or separators are thus considered 1743 to be lacking in units. 1744 FIXME: add support for multibyte thousands_sep and decimal_point. */ 1745 1746 while (ISDIGIT (*p)) 1747 { 1748 p++; 1749 1750 if (*p == decimal_point && ISDIGIT (*(p + 1))) 1751 p += 2; 1752 else if (*p == thousands_sep && ISDIGIT (*(p + 1))) 1753 p += 2; 1754 } 1755 1756 { 1757 int order = orders[*p]; 1758 1759 /* For valid units check for MiB vs MB etc. */ 1760 if (order) 1761 check_mixed_SI_IEC (*(p + 1), key); 1762 1763 return sign * order; 1764 } 1765} 1766 1767/* Compare numbers ending in units with SI xor IEC prefixes 1768 <none/unknown> < K/k < M < G < T < P < E < Z < Y 1769 Assume that numbers are properly abbreviated. 1770 i.e. input will never have both 6000K and 5M. */ 1771 1772static int 1773human_numcompare (const char *a, const char *b, struct keyfield *key) 1774{ 1775 while (blanks[to_uchar (*a)]) 1776 a++; 1777 while (blanks[to_uchar (*b)]) 1778 b++; 1779 1780 { 1781 int order_a = find_unit_order (a, key); 1782 int order_b = find_unit_order (b, key); 1783 1784 return (order_a > order_b ? 1 1785 : order_a < order_b ? -1 1786 : strnumcmp (a, b, decimal_point, thousands_sep)); 1787 } 1788} 1789 1790static int 1791general_numcompare (const char *sa, const char *sb) 1792{ 1793 /* FIXME: add option to warn about failed conversions. */ 1794 /* FIXME: maybe add option to try expensive FP conversion 1795 only if A and B can't be compared more cheaply/accurately. */ 1796 1797 char *ea; 1798 char *eb; 1799 double a = strtod (sa, &ea); 1800 double b = strtod (sb, &eb); 1801 1802 /* Put conversion errors at the start of the collating sequence. */ 1803 if (sa == ea) 1804 return sb == eb ? 0 : -1; 1805 if (sb == eb) 1806 return 1; 1807 1808 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after 1809 conversion errors but before numbers; sort them by internal 1810 bit-pattern, for lack of a more portable alternative. */ 1811 return (a < b ? -1 1812 : a > b ? 1 1813 : a == b ? 0 1814 : b == b ? -1 1815 : a == a ? 1 1816 : memcmp ((char *) &a, (char *) &b, sizeof a)); 1817} 1818 1819/* Return an integer in 1..12 of the month name MONTH with length LEN. 1820 Return 0 if the name in S is not recognized. */ 1821 1822static int 1823getmonth (char const *month, size_t len) 1824{ 1825 size_t lo = 0; 1826 size_t hi = MONTHS_PER_YEAR; 1827 char const *monthlim = month + len; 1828 1829 for (;;) 1830 { 1831 if (month == monthlim) 1832 return 0; 1833 if (!blanks[to_uchar (*month)]) 1834 break; 1835 ++month; 1836 } 1837 1838 do 1839 { 1840 size_t ix = (lo + hi) / 2; 1841 char const *m = month; 1842 char const *n = monthtab[ix].name; 1843 1844 for (;; m++, n++) 1845 { 1846 if (!*n) 1847 return monthtab[ix].val; 1848 if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n)) 1849 { 1850 hi = ix; 1851 break; 1852 } 1853 else if (fold_toupper[to_uchar (*m)] > to_uchar (*n)) 1854 { 1855 lo = ix + 1; 1856 break; 1857 } 1858 } 1859 } 1860 while (lo < hi); 1861 1862 return 0; 1863} 1864 1865/* A source of random data. */ 1866static struct randread_source *randread_source; 1867 1868/* Return the Ith randomly-generated state. The caller must invoke 1869 random_state (H) for all H less than I before invoking random_state 1870 (I). */ 1871 1872static struct md5_ctx 1873random_state (size_t i) 1874{ 1875 /* An array of states resulting from the random data, and counts of 1876 its used and allocated members. */ 1877 static struct md5_ctx *state; 1878 static size_t used; 1879 static size_t allocated; 1880 1881 struct md5_ctx *s = &state[i]; 1882 1883 if (used <= i) 1884 { 1885 unsigned char buf[MD5_DIGEST_SIZE]; 1886 1887 used++; 1888 1889 if (allocated <= i) 1890 { 1891 state = X2NREALLOC (state, &allocated); 1892 s = &state[i]; 1893 } 1894 1895 randread (randread_source, buf, sizeof buf); 1896 md5_init_ctx (s); 1897 md5_process_bytes (buf, sizeof buf, s); 1898 } 1899 1900 return *s; 1901} 1902 1903/* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB 1904 with length LENGTHB. Return negative if less, zero if equal, 1905 positive if greater. */ 1906 1907static int 1908cmp_hashes (char const *texta, size_t lena, 1909 char const *textb, size_t lenb) 1910{ 1911 /* Try random hashes until a pair of hashes disagree. But if the 1912 first pair of random hashes agree, check whether the keys are 1913 identical and if so report no difference. */ 1914 int diff; 1915 size_t i; 1916 for (i = 0; ; i++) 1917 { 1918 uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)]; 1919 struct md5_ctx s[2]; 1920 s[0] = s[1] = random_state (i); 1921 md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]); 1922 md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]); 1923 diff = memcmp (dig[0], dig[1], sizeof dig[0]); 1924 if (diff != 0) 1925 break; 1926 if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0) 1927 break; 1928 } 1929 1930 return diff; 1931} 1932 1933/* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB) 1934 using one or more random hash functions. */ 1935 1936static int 1937compare_random (char *restrict texta, size_t lena, 1938 char *restrict textb, size_t lenb) 1939{ 1940 int diff; 1941 1942 if (! hard_LC_COLLATE) 1943 diff = cmp_hashes (texta, lena, textb, lenb); 1944 else 1945 { 1946 /* Transform the text into the basis of comparison, so that byte 1947 strings that would otherwise considered to be equal are 1948 considered equal here even if their bytes differ. */ 1949 1950 char *buf = NULL; 1951 char stackbuf[4000]; 1952 size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena); 1953 bool a_fits = tlena <= sizeof stackbuf; 1954 size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL), 1955 (a_fits ? sizeof stackbuf - tlena : 0), 1956 textb, lenb); 1957 1958 if (a_fits && tlena + tlenb <= sizeof stackbuf) 1959 buf = stackbuf; 1960 else 1961 { 1962 /* Adding 1 to the buffer size lets xmemxfrm run a bit 1963 faster by avoiding the need for an extra buffer copy. */ 1964 buf = xmalloc (tlena + tlenb + 1); 1965 xmemxfrm (buf, tlena + 1, texta, lena); 1966 xmemxfrm (buf + tlena, tlenb + 1, textb, lenb); 1967 } 1968 1969 diff = cmp_hashes (buf, tlena, buf + tlena, tlenb); 1970 1971 if (buf != stackbuf) 1972 free (buf); 1973 } 1974 1975 return diff; 1976} 1977 1978/* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB) 1979 using filevercmp. See lib/filevercmp.h for function description. */ 1980 1981static int 1982compare_version (char *restrict texta, size_t lena, 1983 char *restrict textb, size_t lenb) 1984{ 1985 int diff; 1986 1987 /* It is necessary to save the character after the end of the field. 1988 "filevercmp" works with NUL terminated strings. Our blocks of 1989 text are not necessarily terminated with a NUL byte. */ 1990 char sv_a = texta[lena]; 1991 char sv_b = textb[lenb]; 1992 1993 texta[lena] = '\0'; 1994 textb[lenb] = '\0'; 1995 1996 diff = filevercmp (texta, textb); 1997 1998 texta[lena] = sv_a; 1999 textb[lenb] = sv_b; 2000 2001 return diff; 2002} 2003 2004/* Compare two lines A and B trying every key in sequence until there 2005 are no more keys or a difference is found. */ 2006 2007static int 2008keycompare (const struct line *a, const struct line *b) 2009{ 2010 struct keyfield *key = keylist; 2011 2012 /* For the first iteration only, the key positions have been 2013 precomputed for us. */ 2014 char *texta = a->keybeg; 2015 char *textb = b->keybeg; 2016 char *lima = a->keylim; 2017 char *limb = b->keylim; 2018 2019 int diff; 2020 2021 for (;;) 2022 { 2023 char const *translate = key->translate; 2024 bool const *ignore = key->ignore; 2025 2026 /* Treat field ends before field starts as empty fields. */ 2027 lima = MAX (texta, lima); 2028 limb = MAX (textb, limb); 2029 2030 { 2031 /* Find the lengths. */ 2032 size_t lena = lima - texta; 2033 size_t lenb = limb - textb; 2034 2035 /* Actually compare the fields. */ 2036 2037 if (key->random) 2038 diff = compare_random (texta, lena, textb, lenb); 2039 else if (key->numeric || key->general_numeric || key->human_numeric) 2040 { 2041 char savea = *lima, saveb = *limb; 2042 2043 *lima = *limb = '\0'; 2044 diff = (key->numeric ? numcompare (texta, textb) 2045 : key->general_numeric ? general_numcompare (texta, textb) 2046 : human_numcompare (texta, textb, key)); 2047 *lima = savea, *limb = saveb; 2048 } 2049 else if (key->version) 2050 diff = compare_version (texta, lena, textb, lenb); 2051 else if (key->month) 2052 diff = getmonth (texta, lena) - getmonth (textb, lenb); 2053 /* Sorting like this may become slow, so in a simple locale the user 2054 can select a faster sort that is similar to ascii sort. */ 2055 else if (hard_LC_COLLATE) 2056 { 2057 if (ignore || translate) 2058 { 2059 char buf[4000]; 2060 size_t size = lena + 1 + lenb + 1; 2061 char *copy_a = (size <= sizeof buf ? buf : xmalloc (size)); 2062 char *copy_b = copy_a + lena + 1; 2063 size_t new_len_a, new_len_b, i; 2064 2065 /* Ignore and/or translate chars before comparing. */ 2066 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++) 2067 { 2068 if (i < lena) 2069 { 2070 copy_a[new_len_a] = (translate 2071 ? translate[to_uchar (texta[i])] 2072 : texta[i]); 2073 if (!ignore || !ignore[to_uchar (texta[i])]) 2074 ++new_len_a; 2075 } 2076 if (i < lenb) 2077 { 2078 copy_b[new_len_b] = (translate 2079 ? translate[to_uchar (textb[i])] 2080 : textb [i]); 2081 if (!ignore || !ignore[to_uchar (textb[i])]) 2082 ++new_len_b; 2083 } 2084 } 2085 2086 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b); 2087 2088 if (sizeof buf < size) 2089 free (copy_a); 2090 } 2091 else if (lena == 0) 2092 diff = - NONZERO (lenb); 2093 else if (lenb == 0) 2094 goto greater; 2095 else 2096 diff = xmemcoll (texta, lena, textb, lenb); 2097 } 2098 else if (ignore) 2099 { 2100#define CMP_WITH_IGNORE(A, B) \ 2101 do \ 2102 { \ 2103 for (;;) \ 2104 { \ 2105 while (texta < lima && ignore[to_uchar (*texta)]) \ 2106 ++texta; \ 2107 while (textb < limb && ignore[to_uchar (*textb)]) \ 2108 ++textb; \ 2109 if (! (texta < lima && textb < limb)) \ 2110 break; \ 2111 diff = to_uchar (A) - to_uchar (B); \ 2112 if (diff) \ 2113 goto not_equal; \ 2114 ++texta; \ 2115 ++textb; \ 2116 } \ 2117 \ 2118 diff = (texta < lima) - (textb < limb); \ 2119 } \ 2120 while (0) 2121 2122 if (translate) 2123 CMP_WITH_IGNORE (translate[to_uchar (*texta)], 2124 translate[to_uchar (*textb)]); 2125 else 2126 CMP_WITH_IGNORE (*texta, *textb); 2127 } 2128 else if (lena == 0) 2129 diff = - NONZERO (lenb); 2130 else if (lenb == 0) 2131 goto greater; 2132 else 2133 { 2134 if (translate) 2135 { 2136 while (texta < lima && textb < limb) 2137 { 2138 diff = (to_uchar (translate[to_uchar (*texta++)]) 2139 - to_uchar (translate[to_uchar (*textb++)])); 2140 if (diff) 2141 goto not_equal; 2142 } 2143 } 2144 else 2145 { 2146 diff = memcmp (texta, textb, MIN (lena, lenb)); 2147 if (diff) 2148 goto not_equal; 2149 } 2150 diff = lena < lenb ? -1 : lena != lenb; 2151 } 2152 2153 if (diff) 2154 goto not_equal; 2155 2156 key = key->next; 2157 if (! key) 2158 break; 2159 2160 /* Find the beginning and limit of the next field. */ 2161 if (key->eword != SIZE_MAX) 2162 lima = limfield (a, key), limb = limfield (b, key); 2163 else 2164 lima = a->text + a->length - 1, limb = b->text + b->length - 1; 2165 2166 if (key->sword != SIZE_MAX) 2167 texta = begfield (a, key), textb = begfield (b, key); 2168 else 2169 { 2170 texta = a->text, textb = b->text; 2171 if (key->skipsblanks) 2172 { 2173 while (texta < lima && blanks[to_uchar (*texta)]) 2174 ++texta; 2175 while (textb < limb && blanks[to_uchar (*textb)]) 2176 ++textb; 2177 } 2178 } 2179 } 2180 } 2181 2182 return 0; 2183 2184 greater: 2185 diff = 1; 2186 not_equal: 2187 return key->reverse ? -diff : diff; 2188} 2189 2190/* Compare two lines A and B, returning negative, zero, or positive 2191 depending on whether A compares less than, equal to, or greater than B. */ 2192 2193static int 2194compare (const struct line *a, const struct line *b) 2195{ 2196 int diff; 2197 size_t alen, blen; 2198 2199 /* First try to compare on the specified keys (if any). 2200 The only two cases with no key at all are unadorned sort, 2201 and unadorned sort -r. */ 2202 if (keylist) 2203 { 2204 diff = keycompare (a, b); 2205 if (diff || unique || stable) 2206 return diff; 2207 } 2208 2209 /* If the keys all compare equal (or no keys were specified) 2210 fall through to the default comparison. */ 2211 alen = a->length - 1, blen = b->length - 1; 2212 2213 if (alen == 0) 2214 diff = - NONZERO (blen); 2215 else if (blen == 0) 2216 diff = 1; 2217 else if (hard_LC_COLLATE) 2218 diff = xmemcoll (a->text, alen, b->text, blen); 2219 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen)))) 2220 diff = alen < blen ? -1 : alen != blen; 2221 2222 return reverse ? -diff : diff; 2223} 2224 2225/* Check that the lines read from FILE_NAME come in order. Return 2226 true if they are in order. If CHECKONLY == 'c', also print a 2227 diagnostic (FILE_NAME, line number, contents of line) to stderr if 2228 they are not in order. */ 2229 2230static bool 2231check (char const *file_name, char checkonly) 2232{ 2233 FILE *fp = xfopen (file_name, "r"); 2234 struct buffer buf; /* Input buffer. */ 2235 struct line temp; /* Copy of previous line. */ 2236 size_t alloc = 0; 2237 uintmax_t line_number = 0; 2238 struct keyfield const *key = keylist; 2239 bool nonunique = ! unique; 2240 bool ordered = true; 2241 2242 initbuf (&buf, sizeof (struct line), 2243 MAX (merge_buffer_size, sort_size)); 2244 temp.text = NULL; 2245 2246 while (fillbuf (&buf, fp, file_name)) 2247 { 2248 struct line const *line = buffer_linelim (&buf); 2249 struct line const *linebase = line - buf.nlines; 2250 2251 /* Make sure the line saved from the old buffer contents is 2252 less than or equal to the first line of the new buffer. */ 2253 if (alloc && nonunique <= compare (&temp, line - 1)) 2254 { 2255 found_disorder: 2256 { 2257 if (checkonly == 'c') 2258 { 2259 struct line const *disorder_line = line - 1; 2260 uintmax_t disorder_line_number = 2261 buffer_linelim (&buf) - disorder_line + line_number; 2262 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)]; 2263 fprintf (stderr, _("%s: %s:%s: disorder: "), 2264 program_name, file_name, 2265 umaxtostr (disorder_line_number, hr_buf)); 2266 write_bytes (disorder_line->text, disorder_line->length, 2267 stderr, _("standard error")); 2268 } 2269 2270 ordered = false; 2271 break; 2272 } 2273 } 2274 2275 /* Compare each line in the buffer with its successor. */ 2276 while (linebase < --line) 2277 if (nonunique <= compare (line, line - 1)) 2278 goto found_disorder; 2279 2280 line_number += buf.nlines; 2281 2282 /* Save the last line of the buffer. */ 2283 if (alloc < line->length) 2284 { 2285 do 2286 { 2287 alloc *= 2; 2288 if (! alloc) 2289 { 2290 alloc = line->length; 2291 break; 2292 } 2293 } 2294 while (alloc < line->length); 2295 2296 temp.text = xrealloc (temp.text, alloc); 2297 } 2298 memcpy (temp.text, line->text, line->length); 2299 temp.length = line->length; 2300 if (key) 2301 { 2302 temp.keybeg = temp.text + (line->keybeg - line->text); 2303 temp.keylim = temp.text + (line->keylim - line->text); 2304 } 2305 } 2306 2307 xfclose (fp, file_name); 2308 free (buf.buf); 2309 free (temp.text); 2310 return ordered; 2311} 2312 2313/* Open FILES (there are NFILES of them) and store the resulting array 2314 of stream pointers into (*PFPS). Allocate the array. Return the 2315 number of successfully opened files, setting errno if this value is 2316 less than NFILES. */ 2317 2318static size_t 2319open_input_files (struct sortfile *files, size_t nfiles, FILE ***pfps) 2320{ 2321 FILE **fps = *pfps = xnmalloc (nfiles, sizeof *fps); 2322 int i; 2323 2324 /* Open as many input files as we can. */ 2325 for (i = 0; i < nfiles; i++) 2326 { 2327 fps[i] = (files[i].pid 2328 ? open_temp (files[i].name, files[i].pid) 2329 : stream_open (files[i].name, "r")); 2330 if (!fps[i]) 2331 break; 2332 } 2333 2334 return i; 2335} 2336 2337/* Merge lines from FILES onto OFP. NTEMPS is the number of temporary 2338 files (all of which are at the start of the FILES array), and 2339 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE. 2340 FPS is the vector of open stream corresponding to the files. 2341 Close input and output streams before returning. 2342 OUTPUT_FILE gives the name of the output file. If it is NULL, 2343 the output file is standard output. */ 2344 2345static void 2346mergefps (struct sortfile *files, size_t ntemps, size_t nfiles, 2347 FILE *ofp, char const *output_file, FILE **fps) 2348{ 2349 struct buffer *buffer = xnmalloc (nfiles, sizeof *buffer); 2350 /* Input buffers for each file. */ 2351 struct line saved; /* Saved line storage for unique check. */ 2352 struct line const *savedline = NULL; 2353 /* &saved if there is a saved line. */ 2354 size_t savealloc = 0; /* Size allocated for the saved line. */ 2355 struct line const **cur = xnmalloc (nfiles, sizeof *cur); 2356 /* Current line in each line table. */ 2357 struct line const **base = xnmalloc (nfiles, sizeof *base); 2358 /* Base of each line table. */ 2359 size_t *ord = xnmalloc (nfiles, sizeof *ord); 2360 /* Table representing a permutation of fps, 2361 such that cur[ord[0]] is the smallest line 2362 and will be next output. */ 2363 size_t i; 2364 size_t j; 2365 size_t t; 2366 struct keyfield const *key = keylist; 2367 saved.text = NULL; 2368 2369 /* Read initial lines from each input file. */ 2370 for (i = 0; i < nfiles; ) 2371 { 2372 initbuf (&buffer[i], sizeof (struct line), 2373 MAX (merge_buffer_size, sort_size / nfiles)); 2374 if (fillbuf (&buffer[i], fps[i], files[i].name)) 2375 { 2376 struct line const *linelim = buffer_linelim (&buffer[i]); 2377 cur[i] = linelim - 1; 2378 base[i] = linelim - buffer[i].nlines; 2379 i++; 2380 } 2381 else 2382 { 2383 /* fps[i] is empty; eliminate it from future consideration. */ 2384 xfclose (fps[i], files[i].name); 2385 if (i < ntemps) 2386 { 2387 ntemps--; 2388 zaptemp (files[i].name); 2389 } 2390 free (buffer[i].buf); 2391 --nfiles; 2392 for (j = i; j < nfiles; ++j) 2393 { 2394 files[j] = files[j + 1]; 2395 fps[j] = fps[j + 1]; 2396 } 2397 } 2398 } 2399 2400 /* Set up the ord table according to comparisons among input lines. 2401 Since this only reorders two items if one is strictly greater than 2402 the other, it is stable. */ 2403 for (i = 0; i < nfiles; ++i) 2404 ord[i] = i; 2405 for (i = 1; i < nfiles; ++i) 2406 if (0 < compare (cur[ord[i - 1]], cur[ord[i]])) 2407 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0; 2408 2409 /* Repeatedly output the smallest line until no input remains. */ 2410 while (nfiles) 2411 { 2412 struct line const *smallest = cur[ord[0]]; 2413 2414 /* If uniquified output is turned on, output only the first of 2415 an identical series of lines. */ 2416 if (unique) 2417 { 2418 if (savedline && compare (savedline, smallest)) 2419 { 2420 savedline = NULL; 2421 write_bytes (saved.text, saved.length, ofp, output_file); 2422 } 2423 if (!savedline) 2424 { 2425 savedline = &saved; 2426 if (savealloc < smallest->length) 2427 { 2428 do 2429 if (! savealloc) 2430 { 2431 savealloc = smallest->length; 2432 break; 2433 } 2434 while ((savealloc *= 2) < smallest->length); 2435 2436 saved.text = xrealloc (saved.text, savealloc); 2437 } 2438 saved.length = smallest->length; 2439 memcpy (saved.text, smallest->text, saved.length); 2440 if (key) 2441 { 2442 saved.keybeg = 2443 saved.text + (smallest->keybeg - smallest->text); 2444 saved.keylim = 2445 saved.text + (smallest->keylim - smallest->text); 2446 } 2447 } 2448 } 2449 else 2450 write_bytes (smallest->text, smallest->length, ofp, output_file); 2451 2452 /* Check if we need to read more lines into core. */ 2453 if (base[ord[0]] < smallest) 2454 cur[ord[0]] = smallest - 1; 2455 else 2456 { 2457 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name)) 2458 { 2459 struct line const *linelim = buffer_linelim (&buffer[ord[0]]); 2460 cur[ord[0]] = linelim - 1; 2461 base[ord[0]] = linelim - buffer[ord[0]].nlines; 2462 } 2463 else 2464 { 2465 /* We reached EOF on fps[ord[0]]. */ 2466 for (i = 1; i < nfiles; ++i) 2467 if (ord[i] > ord[0]) 2468 --ord[i]; 2469 --nfiles; 2470 xfclose (fps[ord[0]], files[ord[0]].name); 2471 if (ord[0] < ntemps) 2472 { 2473 ntemps--; 2474 zaptemp (files[ord[0]].name); 2475 } 2476 free (buffer[ord[0]].buf); 2477 for (i = ord[0]; i < nfiles; ++i) 2478 { 2479 fps[i] = fps[i + 1]; 2480 files[i] = files[i + 1]; 2481 buffer[i] = buffer[i + 1]; 2482 cur[i] = cur[i + 1]; 2483 base[i] = base[i + 1]; 2484 } 2485 for (i = 0; i < nfiles; ++i) 2486 ord[i] = ord[i + 1]; 2487 continue; 2488 } 2489 } 2490 2491 /* The new line just read in may be larger than other lines 2492 already in main memory; push it back in the queue until we 2493 encounter a line larger than it. Optimize for the common 2494 case where the new line is smallest. */ 2495 { 2496 size_t lo = 1; 2497 size_t hi = nfiles; 2498 size_t probe = lo; 2499 size_t ord0 = ord[0]; 2500 size_t count_of_smaller_lines; 2501 2502 while (lo < hi) 2503 { 2504 int cmp = compare (cur[ord0], cur[ord[probe]]); 2505 if (cmp < 0 || (cmp == 0 && ord0 < ord[probe])) 2506 hi = probe; 2507 else 2508 lo = probe + 1; 2509 probe = (lo + hi) / 2; 2510 } 2511 2512 count_of_smaller_lines = lo - 1; 2513 for (j = 0; j < count_of_smaller_lines; j++) 2514 ord[j] = ord[j + 1]; 2515 ord[count_of_smaller_lines] = ord0; 2516 } 2517 2518 /* Free up some resources every once in a while. */ 2519 if (MAX_PROCS_BEFORE_REAP < nprocs) 2520 reap_some (); 2521 } 2522 2523 if (unique && savedline) 2524 { 2525 write_bytes (saved.text, saved.length, ofp, output_file); 2526 free (saved.text); 2527 } 2528 2529 xfclose (ofp, output_file); 2530 free(fps); 2531 free(buffer); 2532 free(ord); 2533 free(base); 2534 free(cur); 2535} 2536 2537/* Merge lines from FILES onto OFP. NTEMPS is the number of temporary 2538 files (all of which are at the start of the FILES array), and 2539 NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE. 2540 Close input and output files before returning. 2541 OUTPUT_FILE gives the name of the output file. 2542 2543 Return the number of files successfully merged. This number can be 2544 less than NFILES if we ran low on file descriptors, but in this 2545 case it is never less than 2. */ 2546 2547static size_t 2548mergefiles (struct sortfile *files, size_t ntemps, size_t nfiles, 2549 FILE *ofp, char const *output_file) 2550{ 2551 FILE **fps; 2552 size_t nopened = open_input_files (files, nfiles, &fps); 2553 if (nopened < nfiles && nopened < 2) 2554 die (_("open failed"), files[nopened].name); 2555 mergefps (files, ntemps, nopened, ofp, output_file, fps); 2556 return nopened; 2557} 2558 2559/* Merge into T the two sorted arrays of lines LO (with NLO members) 2560 and HI (with NHI members). T, LO, and HI point just past their 2561 respective arrays, and the arrays are in reverse order. NLO and 2562 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */ 2563 2564static inline void 2565mergelines (struct line *t, 2566 struct line const *lo, size_t nlo, 2567 struct line const *hi, size_t nhi) 2568{ 2569 for (;;) 2570 if (compare (lo - 1, hi - 1) <= 0) 2571 { 2572 *--t = *--lo; 2573 if (! --nlo) 2574 { 2575 /* HI - NHI equalled T - (NLO + NHI) when this function 2576 began. Therefore HI must equal T now, and there is no 2577 need to copy from HI to T. */ 2578 return; 2579 } 2580 } 2581 else 2582 { 2583 *--t = *--hi; 2584 if (! --nhi) 2585 { 2586 do 2587 *--t = *--lo; 2588 while (--nlo); 2589 2590 return; 2591 } 2592 } 2593} 2594 2595/* Sort the array LINES with NLINES members, using TEMP for temporary space. 2596 NLINES must be at least 2. 2597 The input and output arrays are in reverse order, and LINES and 2598 TEMP point just past the end of their respective arrays. 2599 2600 Use a recursive divide-and-conquer algorithm, in the style 2601 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use 2602 the optimization suggested by exercise 5.2.4-10; this requires room 2603 for only 1.5*N lines, rather than the usual 2*N lines. Knuth 2604 writes that this memory optimization was originally published by 2605 D. A. Bell, Comp J. 1 (1958), 75. */ 2606 2607static void 2608sortlines (struct line *lines, size_t nlines, struct line *temp) 2609{ 2610 if (nlines == 2) 2611 { 2612 if (0 < compare (&lines[-1], &lines[-2])) 2613 { 2614 struct line tmp = lines[-1]; 2615 lines[-1] = lines[-2]; 2616 lines[-2] = tmp; 2617 } 2618 } 2619 else 2620 { 2621 size_t nlo = nlines / 2; 2622 size_t nhi = nlines - nlo; 2623 struct line *lo = lines; 2624 struct line *hi = lines - nlo; 2625 struct line *sorted_lo = temp; 2626 2627 sortlines (hi, nhi, temp); 2628 if (1 < nlo) 2629 sortlines_temp (lo, nlo, sorted_lo); 2630 else 2631 sorted_lo[-1] = lo[-1]; 2632 2633 mergelines (lines, sorted_lo, nlo, hi, nhi); 2634 } 2635} 2636 2637/* Like sortlines (LINES, NLINES, TEMP), except output into TEMP 2638 rather than sorting in place. */ 2639 2640static void 2641sortlines_temp (struct line *lines, size_t nlines, struct line *temp) 2642{ 2643 if (nlines == 2) 2644 { 2645 /* Declare `swap' as int, not bool, to work around a bug 2646 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html> 2647 in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */ 2648 int swap = (0 < compare (&lines[-1], &lines[-2])); 2649 temp[-1] = lines[-1 - swap]; 2650 temp[-2] = lines[-2 + swap]; 2651 } 2652 else 2653 { 2654 size_t nlo = nlines / 2; 2655 size_t nhi = nlines - nlo; 2656 struct line *lo = lines; 2657 struct line *hi = lines - nlo; 2658 struct line *sorted_hi = temp - nlo; 2659 2660 sortlines_temp (hi, nhi, sorted_hi); 2661 if (1 < nlo) 2662 sortlines (lo, nlo, temp); 2663 2664 mergelines (temp, lo, nlo, sorted_hi, nhi); 2665 } 2666} 2667 2668/* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is 2669 the same as OUTFILE. If found, merge the found instances (and perhaps 2670 some other files) into a temporary file so that it can in turn be 2671 merged into OUTFILE without destroying OUTFILE before it is completely 2672 read. Return the new value of NFILES, which differs from the old if 2673 some merging occurred. 2674 2675 This test ensures that an otherwise-erroneous use like 2676 "sort -m -o FILE ... FILE ..." copies FILE before writing to it. 2677 It's not clear that POSIX requires this nicety. 2678 Detect common error cases, but don't try to catch obscure cases like 2679 "cat ... FILE ... | sort -m -o FILE" 2680 where traditional "sort" doesn't copy the input and where 2681 people should know that they're getting into trouble anyway. 2682 Catching these obscure cases would slow down performance in 2683 common cases. */ 2684 2685static size_t 2686avoid_trashing_input (struct sortfile *files, size_t ntemps, 2687 size_t nfiles, char const *outfile) 2688{ 2689 size_t i; 2690 bool got_outstat = false; 2691 struct stat outstat; 2692 2693 for (i = ntemps; i < nfiles; i++) 2694 { 2695 bool is_stdin = STREQ (files[i].name, "-"); 2696 bool same; 2697 struct stat instat; 2698 2699 if (outfile && STREQ (outfile, files[i].name) && !is_stdin) 2700 same = true; 2701 else 2702 { 2703 if (! got_outstat) 2704 { 2705 if ((outfile 2706 ? stat (outfile, &outstat) 2707 : fstat (STDOUT_FILENO, &outstat)) 2708 != 0) 2709 break; 2710 got_outstat = true; 2711 } 2712 2713 same = (((is_stdin 2714 ? fstat (STDIN_FILENO, &instat) 2715 : stat (files[i].name, &instat)) 2716 == 0) 2717 && SAME_INODE (instat, outstat)); 2718 } 2719 2720 if (same) 2721 { 2722 FILE *tftp; 2723 pid_t pid; 2724 char *temp = create_temp (&tftp, &pid); 2725 size_t num_merged = 0; 2726 do 2727 { 2728 num_merged += mergefiles (&files[i], 0, nfiles - i, tftp, temp); 2729 files[i].name = temp; 2730 files[i].pid = pid; 2731 2732 if (i + num_merged < nfiles) 2733 memmove(&files[i + 1], &files[i + num_merged], 2734 num_merged * sizeof *files); 2735 ntemps += 1; 2736 nfiles -= num_merged - 1;; 2737 i += num_merged; 2738 } 2739 while (i < nfiles); 2740 } 2741 } 2742 2743 return nfiles; 2744} 2745 2746/* Merge the input FILES. NTEMPS is the number of files at the 2747 start of FILES that are temporary; it is zero at the top level. 2748 NFILES is the total number of files. Put the output in 2749 OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */ 2750 2751static void 2752merge (struct sortfile *files, size_t ntemps, size_t nfiles, 2753 char const *output_file) 2754{ 2755 while (nmerge < nfiles) 2756 { 2757 /* Number of input files processed so far. */ 2758 size_t in; 2759 2760 /* Number of output files generated so far. */ 2761 size_t out; 2762 2763 /* nfiles % NMERGE; this counts input files that are left over 2764 after all full-sized merges have been done. */ 2765 size_t remainder; 2766 2767 /* Number of easily-available slots at the next loop iteration. */ 2768 size_t cheap_slots; 2769 2770 /* Do as many NMERGE-size merges as possible. In the case that 2771 nmerge is bogus, increment by the maximum number of file 2772 descriptors allowed. */ 2773 for (out = in = 0; nmerge <= nfiles - in; out++) 2774 { 2775 FILE *tfp; 2776 pid_t pid; 2777 char *temp = create_temp (&tfp, &pid); 2778 size_t num_merged = mergefiles (&files[in], MIN (ntemps, nmerge), 2779 nmerge, tfp, temp); 2780 ntemps -= MIN (ntemps, num_merged); 2781 files[out].name = temp; 2782 files[out].pid = pid; 2783 in += num_merged; 2784 } 2785 2786 remainder = nfiles - in; 2787 cheap_slots = nmerge - out % nmerge; 2788 2789 if (cheap_slots < remainder) 2790 { 2791 /* So many files remain that they can't all be put into the last 2792 NMERGE-sized output window. Do one more merge. Merge as few 2793 files as possible, to avoid needless I/O. */ 2794 size_t nshortmerge = remainder - cheap_slots + 1; 2795 FILE *tfp; 2796 pid_t pid; 2797 char *temp = create_temp (&tfp, &pid); 2798 size_t num_merged = mergefiles (&files[in], MIN (ntemps, nshortmerge), 2799 nshortmerge, tfp, temp); 2800 ntemps -= MIN (ntemps, num_merged); 2801 files[out].name = temp; 2802 files[out++].pid = pid; 2803 in += num_merged; 2804 } 2805 2806 /* Put the remaining input files into the last NMERGE-sized output 2807 window, so they will be merged in the next pass. */ 2808 memmove(&files[out], &files[in], (nfiles - in) * sizeof *files); 2809 ntemps += out; 2810 nfiles -= in - out; 2811 } 2812 2813 nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file); 2814 2815 /* We aren't guaranteed that this final mergefiles will work, therefore we 2816 try to merge into the output, and then merge as much as we can into a 2817 temp file if we can't. Repeat. */ 2818 2819 for (;;) 2820 { 2821 /* Merge directly into the output file if possible. */ 2822 FILE **fps; 2823 size_t nopened = open_input_files (files, nfiles, &fps); 2824 2825 if (nopened == nfiles) 2826 { 2827 FILE *ofp = stream_open (output_file, "w"); 2828 if (ofp) 2829 { 2830 mergefps (files, ntemps, nfiles, ofp, output_file, fps); 2831 break; 2832 } 2833 if (errno != EMFILE || nopened <= 2) 2834 die (_("open failed"), output_file); 2835 } 2836 else if (nopened <= 2) 2837 die (_("open failed"), files[nopened].name); 2838 2839 /* We ran out of file descriptors. Close one of the input 2840 files, to gain a file descriptor. Then create a temporary 2841 file with our spare file descriptor. Retry if that failed 2842 (e.g., some other process could open a file between the time 2843 we closed and tried to create). */ 2844 { 2845 FILE *tfp; 2846 pid_t pid; 2847 char *temp; 2848 do 2849 { 2850 nopened--; 2851 xfclose (fps[nopened], files[nopened].name); 2852 temp = maybe_create_temp (&tfp, &pid, ! (nopened <= 2)); 2853 } 2854 while (!temp); 2855 2856 /* Merge into the newly allocated temporary. */ 2857 mergefps (&files[0], MIN (ntemps, nopened), nopened, tfp, temp, fps); 2858 ntemps -= MIN (ntemps, nopened); 2859 files[0].name = temp; 2860 files[0].pid = pid; 2861 2862 memmove (&files[1], &files[nopened], (nfiles - nopened) * sizeof *files); 2863 ntemps++; 2864 nfiles -= nopened - 1; 2865 } 2866 } 2867} 2868 2869/* Sort NFILES FILES onto OUTPUT_FILE. */ 2870 2871static void 2872sort (char * const *files, size_t nfiles, char const *output_file) 2873{ 2874 struct buffer buf; 2875 size_t ntemps = 0; 2876 bool output_file_created = false; 2877 2878 buf.alloc = 0; 2879 2880 while (nfiles) 2881 { 2882 char const *temp_output; 2883 char const *file = *files; 2884 FILE *fp = xfopen (file, "r"); 2885 FILE *tfp; 2886 size_t bytes_per_line = (2 * sizeof (struct line) 2887 - sizeof (struct line) / 2); 2888 2889 if (! buf.alloc) 2890 initbuf (&buf, bytes_per_line, 2891 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line)); 2892 buf.eof = false; 2893 files++; 2894 nfiles--; 2895 2896 while (fillbuf (&buf, fp, file)) 2897 { 2898 struct line *line; 2899 struct line *linebase; 2900 2901 if (buf.eof && nfiles 2902 && (bytes_per_line + 1 2903 < (buf.alloc - buf.used - bytes_per_line * buf.nlines))) 2904 { 2905 /* End of file, but there is more input and buffer room. 2906 Concatenate the next input file; this is faster in 2907 the usual case. */ 2908 buf.left = buf.used; 2909 break; 2910 } 2911 2912 line = buffer_linelim (&buf); 2913 linebase = line - buf.nlines; 2914 if (1 < buf.nlines) 2915 sortlines (line, buf.nlines, linebase); 2916 if (buf.eof && !nfiles && !ntemps && !buf.left) 2917 { 2918 xfclose (fp, file); 2919 tfp = xfopen (output_file, "w"); 2920 temp_output = output_file; 2921 output_file_created = true; 2922 } 2923 else 2924 { 2925 ++ntemps; 2926 temp_output = create_temp (&tfp, NULL); 2927 } 2928 2929 do 2930 { 2931 line--; 2932 write_bytes (line->text, line->length, tfp, temp_output); 2933 if (unique) 2934 while (linebase < line && compare (line, line - 1) == 0) 2935 line--; 2936 } 2937 while (linebase < line); 2938 2939 xfclose (tfp, temp_output); 2940 2941 /* Free up some resources every once in a while. */ 2942 if (MAX_PROCS_BEFORE_REAP < nprocs) 2943 reap_some (); 2944 2945 if (output_file_created) 2946 goto finish; 2947 } 2948 xfclose (fp, file); 2949 } 2950 2951 finish: 2952 free (buf.buf); 2953 2954 if (! output_file_created) 2955 { 2956 size_t i; 2957 struct tempnode *node = temphead; 2958 struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles); 2959 for (i = 0; node; i++) 2960 { 2961 tempfiles[i].name = node->name; 2962 tempfiles[i].pid = node->pid; 2963 node = node->next; 2964 } 2965 merge (tempfiles, ntemps, ntemps, output_file); 2966 free (tempfiles); 2967 } 2968} 2969 2970/* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */ 2971 2972static void 2973insertkey (struct keyfield *key_arg) 2974{ 2975 struct keyfield **p; 2976 struct keyfield *key = xmemdup (key_arg, sizeof *key); 2977 2978 for (p = &keylist; *p; p = &(*p)->next) 2979 continue; 2980 *p = key; 2981 key->next = NULL; 2982} 2983 2984/* Report a bad field specification SPEC, with extra info MSGID. */ 2985 2986static void badfieldspec (char const *, char const *) 2987 ATTRIBUTE_NORETURN; 2988static void 2989badfieldspec (char const *spec, char const *msgid) 2990{ 2991 error (SORT_FAILURE, 0, _("%s: invalid field specification %s"), 2992 _(msgid), quote (spec)); 2993 abort (); 2994} 2995 2996/* Report incompatible options. */ 2997 2998static void incompatible_options (char const *) ATTRIBUTE_NORETURN; 2999static void 3000incompatible_options (char const *opts) 3001{ 3002 error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts); 3003 abort (); 3004} 3005 3006/* Check compatibility of ordering options. */ 3007 3008static void 3009check_ordering_compatibility (void) 3010{ 3011 struct keyfield const *key; 3012 3013 for (key = keylist; key; key = key->next) 3014 if ((1 < (key->random + key->numeric + key->general_numeric + key->month 3015 + key->version + !!key->ignore + key->human_numeric)) 3016 || (key->random && key->translate)) 3017 { 3018 /* The following is too big, but guaranteed to be "big enough". */ 3019 char opts[sizeof short_options]; 3020 char *p = opts; 3021 if (key->ignore == nondictionary) 3022 *p++ = 'd'; 3023 if (key->translate) 3024 *p++ = 'f'; 3025 if (key->general_numeric) 3026 *p++ = 'g'; 3027 if (key->human_numeric) 3028 *p++ = 'h'; 3029 if (key->ignore == nonprinting) 3030 *p++ = 'i'; 3031 if (key->month) 3032 *p++ = 'M'; 3033 if (key->numeric) 3034 *p++ = 'n'; 3035 if (key->version) 3036 *p++ = 'V'; 3037 if (key->random) 3038 *p++ = 'R'; 3039 *p = '\0'; 3040 incompatible_options (opts); 3041 } 3042} 3043 3044/* Parse the leading integer in STRING and store the resulting value 3045 (which must fit into size_t) into *VAL. Return the address of the 3046 suffix after the integer. If the value is too large, silently 3047 substitute SIZE_MAX. If MSGID is NULL, return NULL after 3048 failure; otherwise, report MSGID and exit on failure. */ 3049 3050static char const * 3051parse_field_count (char const *string, size_t *val, char const *msgid) 3052{ 3053 char *suffix; 3054 uintmax_t n; 3055 3056 switch (xstrtoumax (string, &suffix, 10, &n, "")) 3057 { 3058 case LONGINT_OK: 3059 case LONGINT_INVALID_SUFFIX_CHAR: 3060 *val = n; 3061 if (*val == n) 3062 break; 3063 /* Fall through. */ 3064 case LONGINT_OVERFLOW: 3065 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR: 3066 *val = SIZE_MAX; 3067 break; 3068 3069 case LONGINT_INVALID: 3070 if (msgid) 3071 error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"), 3072 _(msgid), quote (string)); 3073 return NULL; 3074 } 3075 3076 return suffix; 3077} 3078 3079/* Handle interrupts and hangups. */ 3080 3081static void 3082sighandler (int sig) 3083{ 3084 if (! SA_NOCLDSTOP) 3085 signal (sig, SIG_IGN); 3086 3087 cleanup (); 3088 3089 signal (sig, SIG_DFL); 3090 raise (sig); 3091} 3092 3093/* Set the ordering options for KEY specified in S. 3094 Return the address of the first character in S that 3095 is not a valid ordering option. 3096 BLANKTYPE is the kind of blanks that 'b' should skip. */ 3097 3098static char * 3099set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype) 3100{ 3101 while (*s) 3102 { 3103 switch (*s) 3104 { 3105 case 'b': 3106 if (blanktype == bl_start || blanktype == bl_both) 3107 key->skipsblanks = true; 3108 if (blanktype == bl_end || blanktype == bl_both) 3109 key->skipeblanks = true; 3110 break; 3111 case 'd': 3112 key->ignore = nondictionary; 3113 break; 3114 case 'f': 3115 key->translate = fold_toupper; 3116 break; 3117 case 'g': 3118 key->general_numeric = true; 3119 break; 3120 case 'h': 3121 key->human_numeric = true; 3122 break; 3123 case 'i': 3124 /* Option order should not matter, so don't let -i override 3125 -d. -d implies -i, but -i does not imply -d. */ 3126 if (! key->ignore) 3127 key->ignore = nonprinting; 3128 break; 3129 case 'M': 3130 key->month = true; 3131 break; 3132 case 'n': 3133 key->numeric = true; 3134 break; 3135 case 'R': 3136 key->random = true; 3137 break; 3138 case 'r': 3139 key->reverse = true; 3140 break; 3141 case 'V': 3142 key->version = true; 3143 break; 3144 default: 3145 return (char *) s; 3146 } 3147 ++s; 3148 } 3149 return (char *) s; 3150} 3151 3152static struct keyfield * 3153key_init (struct keyfield *key) 3154{ 3155 memset (key, 0, sizeof *key); 3156 key->eword = SIZE_MAX; 3157 key->si_present = -1; 3158 return key; 3159} 3160 3161int 3162main (int argc, char **argv) 3163{ 3164 struct keyfield *key; 3165 struct keyfield key_buf; 3166 struct keyfield gkey; 3167 char const *s; 3168 int c = 0; 3169 char checkonly = 0; 3170 bool mergeonly = false; 3171 char *random_source = NULL; 3172 bool need_random = false; 3173 size_t nfiles = 0; 3174 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL); 3175 bool obsolete_usage = (posix2_version () < 200112); 3176 char **files; 3177 char *files_from = NULL; 3178 struct Tokens tok; 3179 char const *outfile = NULL; 3180 3181 initialize_main (&argc, &argv); 3182 set_program_name (argv[0]); 3183 setlocale (LC_ALL, ""); 3184 bindtextdomain (PACKAGE, LOCALEDIR); 3185 textdomain (PACKAGE); 3186 3187 initialize_exit_failure (SORT_FAILURE); 3188 3189 hard_LC_COLLATE = hard_locale (LC_COLLATE); 3190#if HAVE_NL_LANGINFO 3191 hard_LC_TIME = hard_locale (LC_TIME); 3192#endif 3193 3194 /* Get locale's representation of the decimal point. */ 3195 { 3196 struct lconv const *locale = localeconv (); 3197 3198 /* If the locale doesn't define a decimal point, or if the decimal 3199 point is multibyte, use the C locale's decimal point. FIXME: 3200 add support for multibyte decimal points. */ 3201 decimal_point = to_uchar (locale->decimal_point[0]); 3202 if (! decimal_point || locale->decimal_point[1]) 3203 decimal_point = '.'; 3204 3205 /* FIXME: add support for multibyte thousands separators. */ 3206 thousands_sep = to_uchar (*locale->thousands_sep); 3207 if (! thousands_sep || locale->thousands_sep[1]) 3208 thousands_sep = -1; 3209 } 3210 3211 have_read_stdin = false; 3212 inittables (); 3213 3214 { 3215 size_t i; 3216 static int const sig[] = 3217 { 3218 /* The usual suspects. */ 3219 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM, 3220#ifdef SIGPOLL 3221 SIGPOLL, 3222#endif 3223#ifdef SIGPROF 3224 SIGPROF, 3225#endif 3226#ifdef SIGVTALRM 3227 SIGVTALRM, 3228#endif 3229#ifdef SIGXCPU 3230 SIGXCPU, 3231#endif 3232#ifdef SIGXFSZ 3233 SIGXFSZ, 3234#endif 3235 }; 3236 enum { nsigs = ARRAY_CARDINALITY (sig) }; 3237 3238#if SA_NOCLDSTOP 3239 struct sigaction act; 3240 3241 sigemptyset (&caught_signals); 3242 for (i = 0; i < nsigs; i++) 3243 { 3244 sigaction (sig[i], NULL, &act); 3245 if (act.sa_handler != SIG_IGN) 3246 sigaddset (&caught_signals, sig[i]); 3247 } 3248 3249 act.sa_handler = sighandler; 3250 act.sa_mask = caught_signals; 3251 act.sa_flags = 0; 3252 3253 for (i = 0; i < nsigs; i++) 3254 if (sigismember (&caught_signals, sig[i])) 3255 sigaction (sig[i], &act, NULL); 3256#else 3257 for (i = 0; i < nsigs; i++) 3258 if (signal (sig[i], SIG_IGN) != SIG_IGN) 3259 { 3260 signal (sig[i], sighandler); 3261 siginterrupt (sig[i], 1); 3262 } 3263#endif 3264 } 3265 signal (SIGCHLD, SIG_DFL); /* Don't inherit CHLD handling from parent. */ 3266 3267 /* The signal mask is known, so it is safe to invoke exit_cleanup. */ 3268 atexit (exit_cleanup); 3269 3270 gkey.sword = gkey.eword = SIZE_MAX; 3271 gkey.ignore = NULL; 3272 gkey.translate = NULL; 3273 gkey.numeric = gkey.general_numeric = gkey.human_numeric = false; 3274 gkey.si_present = -1; 3275 gkey.random = gkey.version = false; 3276 gkey.month = gkey.reverse = false; 3277 gkey.skipsblanks = gkey.skipeblanks = false; 3278 3279 files = xnmalloc (argc, sizeof *files); 3280 3281 for (;;) 3282 { 3283 /* Parse an operand as a file after "--" was seen; or if 3284 pedantic and a file was seen, unless the POSIX version 3285 predates 1003.1-2001 and -c was not seen and the operand is 3286 "-o FILE" or "-oFILE". */ 3287 int oi = -1; 3288 3289 if (c == -1 3290 || (posixly_correct && nfiles != 0 3291 && ! (obsolete_usage 3292 && ! checkonly 3293 && optind != argc 3294 && argv[optind][0] == '-' && argv[optind][1] == 'o' 3295 && (argv[optind][2] || optind + 1 != argc))) 3296 || ((c = getopt_long (argc, argv, short_options, 3297 long_options, &oi)) 3298 == -1)) 3299 { 3300 if (argc <= optind) 3301 break; 3302 files[nfiles++] = argv[optind++]; 3303 } 3304 else switch (c) 3305 { 3306 case 1: 3307 key = NULL; 3308 if (optarg[0] == '+') 3309 { 3310 bool minus_pos_usage = (optind != argc && argv[optind][0] == '-' 3311 && ISDIGIT (argv[optind][1])); 3312 obsolete_usage |= minus_pos_usage && !posixly_correct; 3313 if (obsolete_usage) 3314 { 3315 /* Treat +POS1 [-POS2] as a key if possible; but silently 3316 treat an operand as a file if it is not a valid +POS1. */ 3317 key = key_init (&key_buf); 3318 s = parse_field_count (optarg + 1, &key->sword, NULL); 3319 if (s && *s == '.') 3320 s = parse_field_count (s + 1, &key->schar, NULL); 3321 if (! (key->sword || key->schar)) 3322 key->sword = SIZE_MAX; 3323 if (! s || *set_ordering (s, key, bl_start)) 3324 key = NULL; 3325 else 3326 { 3327 if (minus_pos_usage) 3328 { 3329 char const *optarg1 = argv[optind++]; 3330 s = parse_field_count (optarg1 + 1, &key->eword, 3331 N_("invalid number after `-'")); 3332 if (*s == '.') 3333 s = parse_field_count (s + 1, &key->echar, 3334 N_("invalid number after `.'")); 3335 if (*set_ordering (s, key, bl_end)) 3336 badfieldspec (optarg1, 3337 N_("stray character in field spec")); 3338 } 3339 insertkey (key); 3340 } 3341 } 3342 } 3343 if (! key) 3344 files[nfiles++] = optarg; 3345 break; 3346 3347 case SORT_OPTION: 3348 c = XARGMATCH ("--sort", optarg, sort_args, sort_types); 3349 /* Fall through. */ 3350 case 'b': 3351 case 'd': 3352 case 'f': 3353 case 'g': 3354 case 'h': 3355 case 'i': 3356 case 'M': 3357 case 'n': 3358 case 'r': 3359 case 'R': 3360 case 'V': 3361 { 3362 char str[2]; 3363 str[0] = c; 3364 str[1] = '\0'; 3365 set_ordering (str, &gkey, bl_both); 3366 } 3367 break; 3368 3369 case CHECK_OPTION: 3370 c = (optarg 3371 ? XARGMATCH ("--check", optarg, check_args, check_types) 3372 : 'c'); 3373 /* Fall through. */ 3374 case 'c': 3375 case 'C': 3376 if (checkonly && checkonly != c) 3377 incompatible_options ("cC"); 3378 checkonly = c; 3379 break; 3380 3381 case COMPRESS_PROGRAM_OPTION: 3382 if (compress_program && !STREQ (compress_program, optarg)) 3383 error (SORT_FAILURE, 0, _("multiple compress programs specified")); 3384 compress_program = optarg; 3385 break; 3386 3387 case FILES0_FROM_OPTION: 3388 files_from = optarg; 3389 break; 3390 3391 case 'k': 3392 key = key_init (&key_buf); 3393 3394 /* Get POS1. */ 3395 s = parse_field_count (optarg, &key->sword, 3396 N_("invalid number at field start")); 3397 if (! key->sword--) 3398 { 3399 /* Provoke with `sort -k0' */ 3400 badfieldspec (optarg, N_("field number is zero")); 3401 } 3402 if (*s == '.') 3403 { 3404 s = parse_field_count (s + 1, &key->schar, 3405 N_("invalid number after `.'")); 3406 if (! key->schar--) 3407 { 3408 /* Provoke with `sort -k1.0' */ 3409 badfieldspec (optarg, N_("character offset is zero")); 3410 } 3411 } 3412 if (! (key->sword || key->schar)) 3413 key->sword = SIZE_MAX; 3414 s = set_ordering (s, key, bl_start); 3415 if (*s != ',') 3416 { 3417 key->eword = SIZE_MAX; 3418 key->echar = 0; 3419 } 3420 else 3421 { 3422 /* Get POS2. */ 3423 s = parse_field_count (s + 1, &key->eword, 3424 N_("invalid number after `,'")); 3425 if (! key->eword--) 3426 { 3427 /* Provoke with `sort -k1,0' */ 3428 badfieldspec (optarg, N_("field number is zero")); 3429 } 3430 if (*s == '.') 3431 { 3432 s = parse_field_count (s + 1, &key->echar, 3433 N_("invalid number after `.'")); 3434 } 3435 s = set_ordering (s, key, bl_end); 3436 } 3437 if (*s) 3438 badfieldspec (optarg, N_("stray character in field spec")); 3439 insertkey (key); 3440 break; 3441 3442 case 'm': 3443 mergeonly = true; 3444 break; 3445 3446 case NMERGE_OPTION: 3447 specify_nmerge (oi, c, optarg); 3448 break; 3449 3450 case 'o': 3451 if (outfile && !STREQ (outfile, optarg)) 3452 error (SORT_FAILURE, 0, _("multiple output files specified")); 3453 outfile = optarg; 3454 break; 3455 3456 case RANDOM_SOURCE_OPTION: 3457 if (random_source && !STREQ (random_source, optarg)) 3458 error (SORT_FAILURE, 0, _("multiple random sources specified")); 3459 random_source = optarg; 3460 break; 3461 3462 case 's': 3463 stable = true; 3464 break; 3465 3466 case 'S': 3467 specify_sort_size (oi, c, optarg); 3468 break; 3469 3470 case 't': 3471 { 3472 char newtab = optarg[0]; 3473 if (! newtab) 3474 error (SORT_FAILURE, 0, _("empty tab")); 3475 if (optarg[1]) 3476 { 3477 if (STREQ (optarg, "\\0")) 3478 newtab = '\0'; 3479 else 3480 { 3481 /* Provoke with `sort -txx'. Complain about 3482 "multi-character tab" instead of "multibyte tab", so 3483 that the diagnostic's wording does not need to be 3484 changed once multibyte characters are supported. */ 3485 error (SORT_FAILURE, 0, _("multi-character tab %s"), 3486 quote (optarg)); 3487 } 3488 } 3489 if (tab != TAB_DEFAULT && tab != newtab) 3490 error (SORT_FAILURE, 0, _("incompatible tabs")); 3491 tab = newtab; 3492 } 3493 break; 3494 3495 case 'T': 3496 add_temp_dir (optarg); 3497 break; 3498 3499 case 'u': 3500 unique = true; 3501 break; 3502 3503 case 'y': 3504 /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x 3505 through Solaris 7. It is also accepted by many non-Solaris 3506 "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5. 3507 -y is marked as obsolete starting with Solaris 8 (1999), but is 3508 still accepted as of Solaris 10 prerelease (2004). 3509 3510 Solaris 2.5.1 "sort -y 100" reads the input file "100", but 3511 emulate Solaris 8 and 9 "sort -y 100" which ignores the "100", 3512 and which in general ignores the argument after "-y" if it 3513 consists entirely of digits (it can even be empty). */ 3514 if (optarg == argv[optind - 1]) 3515 { 3516 char const *p; 3517 for (p = optarg; ISDIGIT (*p); p++) 3518 continue; 3519 optind -= (*p != '\0'); 3520 } 3521 break; 3522 3523 case 'z': 3524 eolchar = 0; 3525 break; 3526 3527 case_GETOPT_HELP_CHAR; 3528 3529 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); 3530 3531 default: 3532 usage (SORT_FAILURE); 3533 } 3534 } 3535 3536 if (files_from) 3537 { 3538 FILE *stream; 3539 3540 /* When using --files0-from=F, you may not specify any files 3541 on the command-line. */ 3542 if (nfiles) 3543 { 3544 error (0, 0, _("extra operand %s"), quote (files[0])); 3545 fprintf (stderr, "%s\n", 3546 _("file operands cannot be combined with --files0-from")); 3547 usage (SORT_FAILURE); 3548 } 3549 3550 if (STREQ (files_from, "-")) 3551 stream = stdin; 3552 else 3553 { 3554 stream = fopen (files_from, "r"); 3555 if (stream == NULL) 3556 error (SORT_FAILURE, errno, _("cannot open %s for reading"), 3557 quote (files_from)); 3558 } 3559 3560 readtokens0_init (&tok); 3561 3562 if (! readtokens0 (stream, &tok) || fclose (stream) != 0) 3563 error (SORT_FAILURE, 0, _("cannot read file names from %s"), 3564 quote (files_from)); 3565 3566 if (tok.n_tok) 3567 { 3568 size_t i; 3569 free (files); 3570 files = tok.tok; 3571 nfiles = tok.n_tok; 3572 for (i = 0; i < nfiles; i++) 3573 { 3574 if (STREQ (files[i], "-")) 3575 error (SORT_FAILURE, 0, _("when reading file names from stdin, " 3576 "no file name of %s allowed"), 3577 quote (files[i])); 3578 else if (files[i][0] == '\0') 3579 { 3580 /* Using the standard `filename:line-number:' prefix here is 3581 not totally appropriate, since NUL is the separator, not NL, 3582 but it might be better than nothing. */ 3583 unsigned long int file_number = i + 1; 3584 error (SORT_FAILURE, 0, 3585 _("%s:%lu: invalid zero-length file name"), 3586 quotearg_colon (files_from), file_number); 3587 } 3588 } 3589 } 3590 else 3591 error (SORT_FAILURE, 0, _("no input from %s"), 3592 quote (files_from)); 3593 } 3594 3595 /* Inheritance of global options to individual keys. */ 3596 for (key = keylist; key; key = key->next) 3597 { 3598 if (! (key->ignore 3599 || key->translate 3600 || (key->skipsblanks 3601 || key->reverse 3602 || key->skipeblanks 3603 || key->month 3604 || key->numeric 3605 || key->version 3606 || key->general_numeric 3607 || key->human_numeric 3608 || key->random))) 3609 { 3610 key->ignore = gkey.ignore; 3611 key->translate = gkey.translate; 3612 key->skipsblanks = gkey.skipsblanks; 3613 key->skipeblanks = gkey.skipeblanks; 3614 key->month = gkey.month; 3615 key->numeric = gkey.numeric; 3616 key->general_numeric = gkey.general_numeric; 3617 key->human_numeric = gkey.human_numeric; 3618 key->random = gkey.random; 3619 key->reverse = gkey.reverse; 3620 key->version = gkey.version; 3621 } 3622 3623 need_random |= key->random; 3624 } 3625 3626 if (!keylist && (gkey.ignore 3627 || gkey.translate 3628 || (gkey.skipsblanks 3629 || gkey.skipeblanks 3630 || gkey.month 3631 || gkey.numeric 3632 || gkey.general_numeric 3633 || gkey.human_numeric 3634 || gkey.random 3635 || gkey.version))) 3636 { 3637 insertkey (&gkey); 3638 need_random |= gkey.random; 3639 } 3640 3641 check_ordering_compatibility (); 3642 3643 reverse = gkey.reverse; 3644 3645 if (need_random) 3646 { 3647 randread_source = randread_new (random_source, MD5_DIGEST_SIZE); 3648 if (! randread_source) 3649 die (_("open failed"), random_source); 3650 } 3651 3652 if (temp_dir_count == 0) 3653 { 3654 char const *tmp_dir = getenv ("TMPDIR"); 3655 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR); 3656 } 3657 3658 if (nfiles == 0) 3659 { 3660 static char *minus = (char *) "-"; 3661 nfiles = 1; 3662 free (files); 3663 files = − 3664 } 3665 3666 /* Need to re-check that we meet the minimum requirement for memory 3667 usage with the final value for NMERGE. */ 3668 if (0 < sort_size) 3669 sort_size = MAX (sort_size, MIN_SORT_SIZE); 3670 3671 if (checkonly) 3672 { 3673 if (nfiles > 1) 3674 error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"), 3675 quote (files[1]), checkonly); 3676 3677 if (outfile) 3678 { 3679 static char opts[] = {0, 'o', 0}; 3680 opts[0] = checkonly; 3681 incompatible_options (opts); 3682 } 3683 3684 /* POSIX requires that sort return 1 IFF invoked with -c or -C and the 3685 input is not properly sorted. */ 3686 exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER); 3687 } 3688 3689 if (mergeonly) 3690 { 3691 struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles); 3692 size_t i; 3693 3694 for (i = 0; i < nfiles; ++i) 3695 sortfiles[i].name = files[i]; 3696 3697 merge (sortfiles, 0, nfiles, outfile); 3698 IF_LINT (free (sortfiles)); 3699 } 3700 else 3701 sort (files, nfiles, outfile); 3702 3703 if (have_read_stdin && fclose (stdin) == EOF) 3704 die (_("close failed"), "-"); 3705 3706 exit (EXIT_SUCCESS); 3707} 3708