1/* sort - sort lines of text (with all kinds of options).
2   Copyright (C) 1988, 1991-2010 Free Software Foundation, Inc.
3
4   This program is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
17   Written December 1988 by Mike Haertel.
18   The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
19   or (US mail) as Mike Haertel c/o Free Software Foundation.
20
21   Ørn E. Hansen added NLS support in 1997.  */
22
23#include <config.h>
24
25#include <getopt.h>
26#include <sys/types.h>
27#include <sys/wait.h>
28#include <signal.h>
29#include "system.h"
30#include "argmatch.h"
31#include "error.h"
32#include "filevercmp.h"
33#include "hard-locale.h"
34#include "hash.h"
35#include "md5.h"
36#include "physmem.h"
37#include "posixver.h"
38#include "quote.h"
39#include "quotearg.h"
40#include "randread.h"
41#include "readtokens0.h"
42#include "stdio--.h"
43#include "stdlib--.h"
44#include "strnumcmp.h"
45#include "xmemcoll.h"
46#include "xmemxfrm.h"
47#include "xnanosleep.h"
48#include "xstrtol.h"
49
50#if HAVE_SYS_RESOURCE_H
51# include <sys/resource.h>
52#endif
53#ifndef RLIMIT_DATA
54struct rlimit { size_t rlim_cur; };
55# define getrlimit(Resource, Rlp) (-1)
56#endif
57
58/* The official name of this program (e.g., no `g' prefix).  */
59#define PROGRAM_NAME "sort"
60
61#define AUTHORS \
62  proper_name ("Mike Haertel"), \
63  proper_name ("Paul Eggert")
64
65#if HAVE_LANGINFO_CODESET
66# include <langinfo.h>
67#endif
68
69/* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
70   present.  */
71#ifndef SA_NOCLDSTOP
72# define SA_NOCLDSTOP 0
73/* No sigprocmask.  Always 'return' zero. */
74# define sigprocmask(How, Set, Oset) (0)
75# define sigset_t int
76# if ! HAVE_SIGINTERRUPT
77#  define siginterrupt(sig, flag) /* empty */
78# endif
79#endif
80
81#if !defined OPEN_MAX && defined NR_OPEN
82# define OPEN_MAX NR_OPEN
83#endif
84#if !defined OPEN_MAX
85# define OPEN_MAX 20
86#endif
87
88#define UCHAR_LIM (UCHAR_MAX + 1)
89
90#ifndef DEFAULT_TMPDIR
91# define DEFAULT_TMPDIR "/tmp"
92#endif
93
94/* Exit statuses.  */
95enum
96  {
97    /* POSIX says to exit with status 1 if invoked with -c and the
98       input is not properly sorted.  */
99    SORT_OUT_OF_ORDER = 1,
100
101    /* POSIX says any other irregular exit must exit with a status
102       code greater than 1.  */
103    SORT_FAILURE = 2
104  };
105
106enum
107  {
108    /* The number of times we should try to fork a compression process
109       (we retry if the fork call fails).  We don't _need_ to compress
110       temp files, this is just to reduce disk access, so this number
111       can be small.  Each retry doubles in duration.  */
112    MAX_FORK_TRIES_COMPRESS = 4,
113
114    /* The number of times we should try to fork a decompression process.
115       If we can't fork a decompression process, we can't sort, so this
116       number should be big.  Each retry doubles in duration.  */
117    MAX_FORK_TRIES_DECOMPRESS = 9
118  };
119
120/* The representation of the decimal point in the current locale.  */
121static int decimal_point;
122
123/* Thousands separator; if -1, then there isn't one.  */
124static int thousands_sep;
125
126/* Nonzero if the corresponding locales are hard.  */
127static bool hard_LC_COLLATE;
128#if HAVE_NL_LANGINFO
129static bool hard_LC_TIME;
130#endif
131
132#define NONZERO(x) ((x) != 0)
133
134/* The kind of blanks for '-b' to skip in various options. */
135enum blanktype { bl_start, bl_end, bl_both };
136
137/* The character marking end of line. Default to \n. */
138static char eolchar = '\n';
139
140/* Lines are held in core as counted strings. */
141struct line
142{
143  char *text;			/* Text of the line. */
144  size_t length;		/* Length including final newline. */
145  char *keybeg;			/* Start of first key. */
146  char *keylim;			/* Limit of first key. */
147};
148
149/* Input buffers. */
150struct buffer
151{
152  char *buf;			/* Dynamically allocated buffer,
153                                   partitioned into 3 regions:
154                                   - input data;
155                                   - unused area;
156                                   - an array of lines, in reverse order.  */
157  size_t used;			/* Number of bytes used for input data.  */
158  size_t nlines;		/* Number of lines in the line array.  */
159  size_t alloc;			/* Number of bytes allocated. */
160  size_t left;			/* Number of bytes left from previous reads. */
161  size_t line_bytes;		/* Number of bytes to reserve for each line. */
162  bool eof;			/* An EOF has been read.  */
163};
164
165struct keyfield
166{
167  size_t sword;			/* Zero-origin 'word' to start at. */
168  size_t schar;			/* Additional characters to skip. */
169  size_t eword;			/* Zero-origin first word after field. */
170  size_t echar;			/* Additional characters in field. */
171  bool const *ignore;		/* Boolean array of characters to ignore. */
172  char const *translate;	/* Translation applied to characters. */
173  bool skipsblanks;		/* Skip leading blanks when finding start.  */
174  bool skipeblanks;		/* Skip leading blanks when finding end.  */
175  bool numeric;			/* Flag for numeric comparison.  Handle
176                                   strings of digits with optional decimal
177                                   point, but no exponential notation. */
178  bool random;			/* Sort by random hash of key.  */
179  bool general_numeric;		/* Flag for general, numeric comparison.
180                                   Handle numbers in exponential notation. */
181  bool human_numeric;		/* Flag for sorting by human readable
182                                   units with either SI xor IEC prefixes. */
183  int si_present;		/* Flag for checking for mixed SI and IEC. */
184  bool month;			/* Flag for comparison by month name. */
185  bool reverse;			/* Reverse the sense of comparison. */
186  bool version;			/* sort by version number */
187  struct keyfield *next;	/* Next keyfield to try. */
188};
189
190struct month
191{
192  char const *name;
193  int val;
194};
195
196/* FIXME: None of these tables work with multibyte character sets.
197   Also, there are many other bugs when handling multibyte characters.
198   One way to fix this is to rewrite `sort' to use wide characters
199   internally, but doing this with good performance is a bit
200   tricky.  */
201
202/* Table of blanks.  */
203static bool blanks[UCHAR_LIM];
204
205/* Table of non-printing characters. */
206static bool nonprinting[UCHAR_LIM];
207
208/* Table of non-dictionary characters (not letters, digits, or blanks). */
209static bool nondictionary[UCHAR_LIM];
210
211/* Translation table folding lower case to upper.  */
212static char fold_toupper[UCHAR_LIM];
213
214#define MONTHS_PER_YEAR 12
215
216/* Table mapping month names to integers.
217   Alphabetic order allows binary search. */
218static struct month monthtab[] =
219{
220  {"APR", 4},
221  {"AUG", 8},
222  {"DEC", 12},
223  {"FEB", 2},
224  {"JAN", 1},
225  {"JUL", 7},
226  {"JUN", 6},
227  {"MAR", 3},
228  {"MAY", 5},
229  {"NOV", 11},
230  {"OCT", 10},
231  {"SEP", 9}
232};
233
234/* During the merge phase, the number of files to merge at once. */
235#define NMERGE_DEFAULT 16
236
237/* Minimum size for a merge or check buffer.  */
238#define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
239
240/* Minimum sort size; the code might not work with smaller sizes.  */
241#define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
242
243/* The number of bytes needed for a merge or check buffer, which can
244   function relatively efficiently even if it holds only one line.  If
245   a longer line is seen, this value is increased.  */
246static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
247
248/* The approximate maximum number of bytes of main memory to use, as
249   specified by the user.  Zero if the user has not specified a size.  */
250static size_t sort_size;
251
252/* The guessed size for non-regular files.  */
253#define INPUT_FILE_SIZE_GUESS (1024 * 1024)
254
255/* Array of directory names in which any temporary files are to be created. */
256static char const **temp_dirs;
257
258/* Number of temporary directory names used.  */
259static size_t temp_dir_count;
260
261/* Number of allocated slots in temp_dirs.  */
262static size_t temp_dir_alloc;
263
264/* Flag to reverse the order of all comparisons. */
265static bool reverse;
266
267/* Flag for stable sort.  This turns off the last ditch bytewise
268   comparison of lines, and instead leaves lines in the same order
269   they were read if all keys compare equal.  */
270static bool stable;
271
272/* If TAB has this value, blanks separate fields.  */
273enum { TAB_DEFAULT = CHAR_MAX + 1 };
274
275/* Tab character separating fields.  If TAB_DEFAULT, then fields are
276   separated by the empty string between a non-blank character and a blank
277   character. */
278static int tab = TAB_DEFAULT;
279
280/* Flag to remove consecutive duplicate lines from the output.
281   Only the last of a sequence of equal lines will be output. */
282static bool unique;
283
284/* Nonzero if any of the input files are the standard input. */
285static bool have_read_stdin;
286
287/* List of key field comparisons to be tried.  */
288static struct keyfield *keylist;
289
290/* Program used to (de)compress temp files.  Must accept -d.  */
291static char const *compress_program;
292
293/* Maximum number of files to merge in one go.  If more than this
294   number are present, temp files will be used. */
295static unsigned int nmerge = NMERGE_DEFAULT;
296
297static void sortlines_temp (struct line *, size_t, struct line *);
298
299/* Report MESSAGE for FILE, then clean up and exit.
300   If FILE is null, it represents standard output.  */
301
302static void die (char const *, char const *) ATTRIBUTE_NORETURN;
303static void
304die (char const *message, char const *file)
305{
306  error (0, errno, "%s: %s", message, file ? file : _("standard output"));
307  exit (SORT_FAILURE);
308}
309
310void
311usage (int status)
312{
313  if (status != EXIT_SUCCESS)
314    fprintf (stderr, _("Try `%s --help' for more information.\n"),
315             program_name);
316  else
317    {
318      printf (_("\
319Usage: %s [OPTION]... [FILE]...\n\
320  or:  %s [OPTION]... --files0-from=F\n\
321"),
322              program_name, program_name);
323      fputs (_("\
324Write sorted concatenation of all FILE(s) to standard output.\n\
325\n\
326"), stdout);
327      fputs (_("\
328Mandatory arguments to long options are mandatory for short options too.\n\
329"), stdout);
330      fputs (_("\
331Ordering options:\n\
332\n\
333"), stdout);
334      fputs (_("\
335  -b, --ignore-leading-blanks  ignore leading blanks\n\
336  -d, --dictionary-order      consider only blanks and alphanumeric characters\n\
337  -f, --ignore-case           fold lower case to upper case characters\n\
338"), stdout);
339      fputs (_("\
340  -g, --general-numeric-sort  compare according to general numerical value\n\
341  -i, --ignore-nonprinting    consider only printable characters\n\
342  -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
343"), stdout);
344      fputs (_("\
345  -h, --human-numeric-sort    compare human readable numbers (e.g., 2K 1G)\n\
346"), stdout);
347      fputs (_("\
348  -n, --numeric-sort          compare according to string numerical value\n\
349  -R, --random-sort           sort by random hash of keys\n\
350      --random-source=FILE    get random bytes from FILE\n\
351  -r, --reverse               reverse the result of comparisons\n\
352"), stdout);
353      fputs (_("\
354      --sort=WORD             sort according to WORD:\n\
355                                general-numeric -g, human-numeric -h, month -M,\n\
356                                numeric -n, random -R, version -V\n\
357  -V, --version-sort          natural sort of (version) numbers within text\n\
358\n\
359"), stdout);
360      fputs (_("\
361Other options:\n\
362\n\
363"), stdout);
364      fputs (_("\
365      --batch-size=NMERGE   merge at most NMERGE inputs at once;\n\
366                            for more use temp files\n\
367"), stdout);
368      fputs (_("\
369  -c, --check, --check=diagnose-first  check for sorted input; do not sort\n\
370  -C, --check=quiet, --check=silent  like -c, but do not report first bad line\n\
371      --compress-program=PROG  compress temporaries with PROG;\n\
372                              decompress them with PROG -d\n\
373      --files0-from=F       read input from the files specified by\n\
374                            NUL-terminated names in file F;\n\
375                            If F is - then read names from standard input\n\
376"), stdout);
377      fputs (_("\
378  -k, --key=POS1[,POS2]     start a key at POS1 (origin 1), end it at POS2\n\
379                            (default end of line)\n\
380  -m, --merge               merge already sorted files; do not sort\n\
381"), stdout);
382      fputs (_("\
383  -o, --output=FILE         write result to FILE instead of standard output\n\
384  -s, --stable              stabilize sort by disabling last-resort comparison\n\
385  -S, --buffer-size=SIZE    use SIZE for main memory buffer\n\
386"), stdout);
387      printf (_("\
388  -t, --field-separator=SEP  use SEP instead of non-blank to blank transition\n\
389  -T, --temporary-directory=DIR  use DIR for temporaries, not $TMPDIR or %s;\n\
390                              multiple options specify multiple directories\n\
391  -u, --unique              with -c, check for strict ordering;\n\
392                              without -c, output only the first of an equal run\n\
393"), DEFAULT_TMPDIR);
394      fputs (_("\
395  -z, --zero-terminated     end lines with 0 byte, not newline\n\
396"), stdout);
397      fputs (HELP_OPTION_DESCRIPTION, stdout);
398      fputs (VERSION_OPTION_DESCRIPTION, stdout);
399      fputs (_("\
400\n\
401POS is F[.C][OPTS], where F is the field number and C the character position\n\
402in the field; both are origin 1.  If neither -t nor -b is in effect, characters\n\
403in a field are counted from the beginning of the preceding whitespace.  OPTS is\n\
404one or more single-letter ordering options, which override global ordering\n\
405options for that key.  If no key is given, use the entire line as the key.\n\
406\n\
407SIZE may be followed by the following multiplicative suffixes:\n\
408"), stdout);
409      fputs (_("\
410% 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
411\n\
412With no FILE, or when FILE is -, read standard input.\n\
413\n\
414*** WARNING ***\n\
415The locale specified by the environment affects sort order.\n\
416Set LC_ALL=C to get the traditional sort order that uses\n\
417native byte values.\n\
418"), stdout );
419      emit_ancillary_info ();
420    }
421
422  exit (status);
423}
424
425/* For long options that have no equivalent short option, use a
426   non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
427enum
428{
429  CHECK_OPTION = CHAR_MAX + 1,
430  COMPRESS_PROGRAM_OPTION,
431  FILES0_FROM_OPTION,
432  NMERGE_OPTION,
433  RANDOM_SOURCE_OPTION,
434  SORT_OPTION
435};
436
437static char const short_options[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z";
438
439static struct option const long_options[] =
440{
441  {"ignore-leading-blanks", no_argument, NULL, 'b'},
442  {"check", optional_argument, NULL, CHECK_OPTION},
443  {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
444  {"dictionary-order", no_argument, NULL, 'd'},
445  {"ignore-case", no_argument, NULL, 'f'},
446  {"files0-from", required_argument, NULL, FILES0_FROM_OPTION},
447  {"general-numeric-sort", no_argument, NULL, 'g'},
448  {"ignore-nonprinting", no_argument, NULL, 'i'},
449  {"key", required_argument, NULL, 'k'},
450  {"merge", no_argument, NULL, 'm'},
451  {"month-sort", no_argument, NULL, 'M'},
452  {"numeric-sort", no_argument, NULL, 'n'},
453  {"human-numeric-sort", no_argument, NULL, 'h'},
454  {"version-sort", no_argument, NULL, 'V'},
455  {"random-sort", no_argument, NULL, 'R'},
456  {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
457  {"sort", required_argument, NULL, SORT_OPTION},
458  {"output", required_argument, NULL, 'o'},
459  {"reverse", no_argument, NULL, 'r'},
460  {"stable", no_argument, NULL, 's'},
461  {"batch-size", required_argument, NULL, NMERGE_OPTION},
462  {"buffer-size", required_argument, NULL, 'S'},
463  {"field-separator", required_argument, NULL, 't'},
464  {"temporary-directory", required_argument, NULL, 'T'},
465  {"unique", no_argument, NULL, 'u'},
466  {"zero-terminated", no_argument, NULL, 'z'},
467  {GETOPT_HELP_OPTION_DECL},
468  {GETOPT_VERSION_OPTION_DECL},
469  {NULL, 0, NULL, 0},
470};
471
472#define CHECK_TABLE \
473  _ct_("quiet",          'C') \
474  _ct_("silent",         'C') \
475  _ct_("diagnose-first", 'c')
476
477static char const *const check_args[] =
478{
479#define _ct_(_s, _c) _s,
480  CHECK_TABLE NULL
481#undef  _ct_
482};
483static char const check_types[] =
484{
485#define _ct_(_s, _c) _c,
486  CHECK_TABLE
487#undef  _ct_
488};
489
490#define SORT_TABLE \
491  _st_("general-numeric", 'g') \
492  _st_("human-numeric",   'h') \
493  _st_("month",           'M') \
494  _st_("numeric",         'n') \
495  _st_("random",          'R') \
496  _st_("version",         'V')
497
498static char const *const sort_args[] =
499{
500#define _st_(_s, _c) _s,
501  SORT_TABLE NULL
502#undef  _st_
503};
504static char const sort_types[] =
505{
506#define _st_(_s, _c) _c,
507  SORT_TABLE
508#undef  _st_
509};
510
511/* The set of signals that are caught.  */
512static sigset_t caught_signals;
513
514/* Critical section status.  */
515struct cs_status
516{
517  bool valid;
518  sigset_t sigs;
519};
520
521/* Enter a critical section.  */
522static struct cs_status
523cs_enter (void)
524{
525  struct cs_status status;
526  status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
527  return status;
528}
529
530/* Leave a critical section.  */
531static void
532cs_leave (struct cs_status status)
533{
534  if (status.valid)
535    {
536      /* Ignore failure when restoring the signal mask. */
537      sigprocmask (SIG_SETMASK, &status.sigs, NULL);
538    }
539}
540
541/* The list of temporary files. */
542struct tempnode
543{
544  struct tempnode *volatile next;
545  pid_t pid;     /* If compressed, the pid of compressor, else zero */
546  char name[1];  /* Actual size is 1 + file name length.  */
547};
548static struct tempnode *volatile temphead;
549static struct tempnode *volatile *temptail = &temphead;
550
551struct sortfile
552{
553  char const *name;
554  pid_t pid;     /* If compressed, the pid of compressor, else zero */
555};
556
557/* A table where we store compression process states.  We clean up all
558   processes in a timely manner so as not to exhaust system resources,
559   so we store the info on whether the process is still running, or has
560   been reaped here.  */
561static Hash_table *proctab;
562
563enum { INIT_PROCTAB_SIZE = 47 };
564
565enum procstate { ALIVE, ZOMBIE };
566
567/* A proctab entry.  The COUNT field is there in case we fork a new
568   compression process that has the same PID as an old zombie process
569   that is still in the table (because the process to decompress the
570   temp file it was associated with hasn't started yet).  */
571struct procnode
572{
573  pid_t pid;
574  enum procstate state;
575  size_t count;
576};
577
578static size_t
579proctab_hasher (const void *entry, size_t tabsize)
580{
581  const struct procnode *node = entry;
582  return node->pid % tabsize;
583}
584
585static bool
586proctab_comparator (const void *e1, const void *e2)
587{
588  const struct procnode *n1 = e1, *n2 = e2;
589  return n1->pid == n2->pid;
590}
591
592/* The total number of forked processes (compressors and decompressors)
593   that have not been reaped yet. */
594static size_t nprocs;
595
596/* The number of child processes we'll allow before we try to reap some. */
597enum { MAX_PROCS_BEFORE_REAP = 2 };
598
599/* If 0 < PID, wait for the child process with that PID to exit.
600   If PID is -1, clean up a random child process which has finished and
601   return the process ID of that child.  If PID is -1 and no processes
602   have quit yet, return 0 without waiting.  */
603
604static pid_t
605reap (pid_t pid)
606{
607  int status;
608  pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
609
610  if (cpid < 0)
611    error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
612           compress_program);
613  else if (0 < cpid)
614    {
615      if (! WIFEXITED (status) || WEXITSTATUS (status))
616        error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
617               compress_program);
618      --nprocs;
619    }
620
621  return cpid;
622}
623
624/* Add the PID of a running compression process to proctab, or update
625   the entry COUNT and STATE fields if it's already there.  This also
626   creates the table for us the first time it's called.  */
627
628static void
629register_proc (pid_t pid)
630{
631  struct procnode test, *node;
632
633  if (! proctab)
634    {
635      proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
636                                 proctab_hasher,
637                                 proctab_comparator,
638                                 free);
639      if (! proctab)
640        xalloc_die ();
641    }
642
643  test.pid = pid;
644  node = hash_lookup (proctab, &test);
645  if (node)
646    {
647      node->state = ALIVE;
648      ++node->count;
649    }
650  else
651    {
652      node = xmalloc (sizeof *node);
653      node->pid = pid;
654      node->state = ALIVE;
655      node->count = 1;
656      if (hash_insert (proctab, node) == NULL)
657        xalloc_die ();
658    }
659}
660
661/* This is called when we reap a random process.  We don't know
662   whether we have reaped a compression process or a decompression
663   process until we look in the table.  If there's an ALIVE entry for
664   it, then we have reaped a compression process, so change the state
665   to ZOMBIE.  Otherwise, it's a decompression processes, so ignore it.  */
666
667static void
668update_proc (pid_t pid)
669{
670  struct procnode test, *node;
671
672  test.pid = pid;
673  node = hash_lookup (proctab, &test);
674  if (node)
675    node->state = ZOMBIE;
676}
677
678/* This is for when we need to wait for a compression process to exit.
679   If it has a ZOMBIE entry in the table then it's already dead and has
680   been reaped.  Note that if there's an ALIVE entry for it, it still may
681   already have died and been reaped if a second process was created with
682   the same PID.  This is probably exceedingly rare, but to be on the safe
683   side we will have to wait for any compression process with this PID.  */
684
685static void
686wait_proc (pid_t pid)
687{
688  struct procnode test, *node;
689
690  test.pid = pid;
691  node = hash_lookup (proctab, &test);
692  if (node->state == ALIVE)
693    reap (pid);
694
695  node->state = ZOMBIE;
696  if (! --node->count)
697    {
698      hash_delete (proctab, node);
699      free (node);
700    }
701}
702
703/* Keep reaping finished children as long as there are more to reap.
704   This doesn't block waiting for any of them, it only reaps those
705   that are already dead.  */
706
707static void
708reap_some (void)
709{
710  pid_t pid;
711
712  while (0 < nprocs && (pid = reap (-1)))
713    update_proc (pid);
714}
715
716/* Clean up any remaining temporary files.  */
717
718static void
719cleanup (void)
720{
721  struct tempnode const *node;
722
723  for (node = temphead; node; node = node->next)
724    unlink (node->name);
725  temphead = NULL;
726}
727
728/* Cleanup actions to take when exiting.  */
729
730static void
731exit_cleanup (void)
732{
733  if (temphead)
734    {
735      /* Clean up any remaining temporary files in a critical section so
736         that a signal handler does not try to clean them too.  */
737      struct cs_status cs = cs_enter ();
738      cleanup ();
739      cs_leave (cs);
740    }
741
742  close_stdout ();
743}
744
745/* Create a new temporary file, returning its newly allocated tempnode.
746   Store into *PFD the file descriptor open for writing.
747   If the creation fails, return NULL and store -1 into *PFD if the
748   failure is due to file descriptor exhaustion and
749   SURVIVE_FD_EXHAUSTION; otherwise, die.  */
750
751static struct tempnode *
752create_temp_file (int *pfd, bool survive_fd_exhaustion)
753{
754  static char const slashbase[] = "/sortXXXXXX";
755  static size_t temp_dir_index;
756  int fd;
757  int saved_errno;
758  char const *temp_dir = temp_dirs[temp_dir_index];
759  size_t len = strlen (temp_dir);
760  struct tempnode *node =
761    xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
762  char *file = node->name;
763  struct cs_status cs;
764
765  memcpy (file, temp_dir, len);
766  memcpy (file + len, slashbase, sizeof slashbase);
767  node->next = NULL;
768  node->pid = 0;
769  if (++temp_dir_index == temp_dir_count)
770    temp_dir_index = 0;
771
772  /* Create the temporary file in a critical section, to avoid races.  */
773  cs = cs_enter ();
774  fd = mkstemp (file);
775  if (0 <= fd)
776    {
777      *temptail = node;
778      temptail = &node->next;
779    }
780  saved_errno = errno;
781  cs_leave (cs);
782  errno = saved_errno;
783
784  if (fd < 0)
785    {
786      if (! (survive_fd_exhaustion && errno == EMFILE))
787        error (SORT_FAILURE, errno, _("cannot create temporary file in %s"),
788               quote (temp_dir));
789      free (node);
790      node = NULL;
791    }
792
793  *pfd = fd;
794  return node;
795}
796
797/* Return a stream for FILE, opened with mode HOW.  A null FILE means
798   standard output; HOW should be "w".  When opening for input, "-"
799   means standard input.  To avoid confusion, do not return file
800   descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
801   opening an ordinary FILE.  Return NULL if unsuccessful.  */
802
803static FILE *
804stream_open (const char *file, const char *how)
805{
806  if (!file)
807    return stdout;
808  if (STREQ (file, "-") && *how == 'r')
809    {
810      have_read_stdin = true;
811      return stdin;
812    }
813  return fopen (file, how);
814}
815
816/* Same as stream_open, except always return a non-null value; die on
817   failure.  */
818
819static FILE *
820xfopen (const char *file, const char *how)
821 {
822  FILE *fp = stream_open (file, how);
823  if (!fp)
824    die (_("open failed"), file);
825  return fp;
826}
827
828/* Close FP, whose name is FILE, and report any errors.  */
829
830static void
831xfclose (FILE *fp, char const *file)
832{
833  switch (fileno (fp))
834    {
835    case STDIN_FILENO:
836      /* Allow reading stdin from tty more than once.  */
837      if (feof (fp))
838        clearerr (fp);
839      break;
840
841    case STDOUT_FILENO:
842      /* Don't close stdout just yet.  close_stdout does that.  */
843      if (fflush (fp) != 0)
844        die (_("fflush failed"), file);
845      break;
846
847    default:
848      if (fclose (fp) != 0)
849        die (_("close failed"), file);
850      break;
851    }
852}
853
854static void
855dup2_or_die (int oldfd, int newfd)
856{
857  if (dup2 (oldfd, newfd) < 0)
858    error (SORT_FAILURE, errno, _("dup2 failed"));
859}
860
861/* Fork a child process for piping to and do common cleanup.  The
862   TRIES parameter tells us how many times to try to fork before
863   giving up.  Return the PID of the child, or -1 (setting errno)
864   on failure. */
865
866static pid_t
867pipe_fork (int pipefds[2], size_t tries)
868{
869#if HAVE_WORKING_FORK
870  struct tempnode *saved_temphead;
871  int saved_errno;
872  double wait_retry = 0.25;
873  pid_t pid IF_LINT (= -1);
874  struct cs_status cs;
875
876  if (pipe (pipefds) < 0)
877    return -1;
878
879  while (tries--)
880    {
881      /* This is so the child process won't delete our temp files
882         if it receives a signal before exec-ing.  */
883      cs = cs_enter ();
884      saved_temphead = temphead;
885      temphead = NULL;
886
887      pid = fork ();
888      saved_errno = errno;
889      if (pid)
890        temphead = saved_temphead;
891
892      cs_leave (cs);
893      errno = saved_errno;
894
895      if (0 <= pid || errno != EAGAIN)
896        break;
897      else
898        {
899          xnanosleep (wait_retry);
900          wait_retry *= 2;
901          reap_some ();
902        }
903    }
904
905  if (pid < 0)
906    {
907      saved_errno = errno;
908      close (pipefds[0]);
909      close (pipefds[1]);
910      errno = saved_errno;
911    }
912  else if (pid == 0)
913    {
914      close (STDIN_FILENO);
915      close (STDOUT_FILENO);
916    }
917  else
918    ++nprocs;
919
920  return pid;
921
922#else  /* ! HAVE_WORKING_FORK */
923  return -1;
924#endif
925}
926
927/* Create a temporary file and start a compression program to filter output
928   to that file.  Set *PFP to the file handle and if PPID is non-NULL,
929   set *PPID to the PID of the newly-created process.  If the creation
930   fails, return NULL if the failure is due to file descriptor
931   exhaustion and SURVIVE_FD_EXHAUSTION; otherwise, die.  */
932
933static char *
934maybe_create_temp (FILE **pfp, pid_t *ppid, bool survive_fd_exhaustion)
935{
936  int tempfd;
937  struct tempnode *node = create_temp_file (&tempfd, survive_fd_exhaustion);
938  char *name;
939
940  if (! node)
941    return NULL;
942
943  name = node->name;
944
945  if (compress_program)
946    {
947      int pipefds[2];
948
949      node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
950      if (0 < node->pid)
951        {
952          close (tempfd);
953          close (pipefds[0]);
954          tempfd = pipefds[1];
955
956          register_proc (node->pid);
957        }
958      else if (node->pid == 0)
959        {
960          close (pipefds[1]);
961          dup2_or_die (tempfd, STDOUT_FILENO);
962          close (tempfd);
963          dup2_or_die (pipefds[0], STDIN_FILENO);
964          close (pipefds[0]);
965
966          if (execlp (compress_program, compress_program, (char *) NULL) < 0)
967            error (SORT_FAILURE, errno, _("couldn't execute %s"),
968                   compress_program);
969        }
970      else
971        node->pid = 0;
972    }
973
974  *pfp = fdopen (tempfd, "w");
975  if (! *pfp)
976    die (_("couldn't create temporary file"), name);
977
978  if (ppid)
979    *ppid = node->pid;
980
981  return name;
982}
983
984/* Create a temporary file and start a compression program to filter output
985   to that file.  Set *PFP to the file handle and if *PPID is non-NULL,
986   set it to the PID of the newly-created process.  Die on failure.  */
987
988static char *
989create_temp (FILE **pfp, pid_t *ppid)
990{
991  return maybe_create_temp (pfp, ppid, false);
992}
993
994/* Open a compressed temp file and start a decompression process through
995   which to filter the input.  PID must be the valid processes ID of the
996   process used to compress the file.  Return NULL (setting errno to
997   EMFILE) if we ran out of file descriptors, and die on any other
998   kind of failure.  */
999
1000static FILE *
1001open_temp (const char *name, pid_t pid)
1002{
1003  int tempfd, pipefds[2];
1004  FILE *fp = NULL;
1005
1006  wait_proc (pid);
1007
1008  tempfd = open (name, O_RDONLY);
1009  if (tempfd < 0)
1010    return NULL;
1011
1012  switch (pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS))
1013    {
1014    case -1:
1015      if (errno != EMFILE)
1016        error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
1017               compress_program);
1018      close (tempfd);
1019      errno = EMFILE;
1020      break;
1021
1022    case 0:
1023      close (pipefds[0]);
1024      dup2_or_die (tempfd, STDIN_FILENO);
1025      close (tempfd);
1026      dup2_or_die (pipefds[1], STDOUT_FILENO);
1027      close (pipefds[1]);
1028
1029      execlp (compress_program, compress_program, "-d", (char *) NULL);
1030      error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
1031             compress_program);
1032
1033    default:
1034      close (tempfd);
1035      close (pipefds[1]);
1036
1037      fp = fdopen (pipefds[0], "r");
1038      if (! fp)
1039        {
1040          int saved_errno = errno;
1041          close (pipefds[0]);
1042          errno = saved_errno;
1043        }
1044      break;
1045    }
1046
1047  return fp;
1048}
1049
1050static void
1051write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
1052{
1053  if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
1054    die (_("write failed"), output_file);
1055}
1056
1057/* Append DIR to the array of temporary directory names.  */
1058static void
1059add_temp_dir (char const *dir)
1060{
1061  if (temp_dir_count == temp_dir_alloc)
1062    temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
1063
1064  temp_dirs[temp_dir_count++] = dir;
1065}
1066
1067/* Remove NAME from the list of temporary files.  */
1068
1069static void
1070zaptemp (const char *name)
1071{
1072  struct tempnode *volatile *pnode;
1073  struct tempnode *node;
1074  struct tempnode *next;
1075  int unlink_status;
1076  int unlink_errno = 0;
1077  struct cs_status cs;
1078
1079  for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
1080    continue;
1081
1082  /* Unlink the temporary file in a critical section to avoid races.  */
1083  next = node->next;
1084  cs = cs_enter ();
1085  unlink_status = unlink (name);
1086  unlink_errno = errno;
1087  *pnode = next;
1088  cs_leave (cs);
1089
1090  if (unlink_status != 0)
1091    error (0, unlink_errno, _("warning: cannot remove: %s"), name);
1092  if (! next)
1093    temptail = pnode;
1094  free (node);
1095}
1096
1097#if HAVE_NL_LANGINFO
1098
1099static int
1100struct_month_cmp (const void *m1, const void *m2)
1101{
1102  struct month const *month1 = m1;
1103  struct month const *month2 = m2;
1104  return strcmp (month1->name, month2->name);
1105}
1106
1107#endif
1108
1109/* Initialize the character class tables. */
1110
1111static void
1112inittables (void)
1113{
1114  size_t i;
1115
1116  for (i = 0; i < UCHAR_LIM; ++i)
1117    {
1118      blanks[i] = !! isblank (i);
1119      nonprinting[i] = ! isprint (i);
1120      nondictionary[i] = ! isalnum (i) && ! isblank (i);
1121      fold_toupper[i] = toupper (i);
1122    }
1123
1124#if HAVE_NL_LANGINFO
1125  /* If we're not in the "C" locale, read different names for months.  */
1126  if (hard_LC_TIME)
1127    {
1128      for (i = 0; i < MONTHS_PER_YEAR; i++)
1129        {
1130          char const *s;
1131          size_t s_len;
1132          size_t j;
1133          char *name;
1134
1135          s = (char *) nl_langinfo (ABMON_1 + i);
1136          s_len = strlen (s);
1137          monthtab[i].name = name = xmalloc (s_len + 1);
1138          monthtab[i].val = i + 1;
1139
1140          for (j = 0; j < s_len; j++)
1141            name[j] = fold_toupper[to_uchar (s[j])];
1142          name[j] = '\0';
1143        }
1144      qsort ((void *) monthtab, MONTHS_PER_YEAR,
1145             sizeof *monthtab, struct_month_cmp);
1146    }
1147#endif
1148}
1149
1150/* Specify how many inputs may be merged at once.
1151   This may be set on the command-line with the
1152   --batch-size option. */
1153static void
1154specify_nmerge (int oi, char c, char const *s)
1155{
1156  uintmax_t n;
1157  struct rlimit rlimit;
1158  enum strtol_error e = xstrtoumax (s, NULL, 10, &n, NULL);
1159
1160  /* Try to find out how many file descriptors we'll be able
1161     to open.  We need at least nmerge + 3 (STDIN_FILENO,
1162     STDOUT_FILENO and STDERR_FILENO). */
1163  unsigned int max_nmerge = ((getrlimit (RLIMIT_NOFILE, &rlimit) == 0
1164                              ? rlimit.rlim_cur
1165                              : OPEN_MAX)
1166                             - 3);
1167
1168  if (e == LONGINT_OK)
1169    {
1170      nmerge = n;
1171      if (nmerge != n)
1172        e = LONGINT_OVERFLOW;
1173      else
1174        {
1175          if (nmerge < 2)
1176            {
1177              error (0, 0, _("invalid --%s argument %s"),
1178                     long_options[oi].name, quote(s));
1179              error (SORT_FAILURE, 0,
1180                     _("minimum --%s argument is %s"),
1181                     long_options[oi].name, quote("2"));
1182            }
1183          else if (max_nmerge < nmerge)
1184            {
1185              e = LONGINT_OVERFLOW;
1186            }
1187          else
1188            return;
1189        }
1190    }
1191
1192  if (e == LONGINT_OVERFLOW)
1193    {
1194      char max_nmerge_buf[INT_BUFSIZE_BOUND (unsigned int)];
1195      error (0, 0, _("--%s argument %s too large"),
1196             long_options[oi].name, quote(s));
1197      error (SORT_FAILURE, 0,
1198             _("maximum --%s argument with current rlimit is %s"),
1199             long_options[oi].name,
1200             uinttostr (max_nmerge, max_nmerge_buf));
1201    }
1202  else
1203    xstrtol_fatal (e, oi, c, long_options, s);
1204}
1205
1206/* Specify the amount of main memory to use when sorting.  */
1207static void
1208specify_sort_size (int oi, char c, char const *s)
1209{
1210  uintmax_t n;
1211  char *suffix;
1212  enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
1213
1214  /* The default unit is KiB.  */
1215  if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
1216    {
1217      if (n <= UINTMAX_MAX / 1024)
1218        n *= 1024;
1219      else
1220        e = LONGINT_OVERFLOW;
1221    }
1222
1223  /* A 'b' suffix means bytes; a '%' suffix means percent of memory.  */
1224  if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
1225    switch (suffix[0])
1226      {
1227      case 'b':
1228        e = LONGINT_OK;
1229        break;
1230
1231      case '%':
1232        {
1233          double mem = physmem_total () * n / 100;
1234
1235          /* Use "<", not "<=", to avoid problems with rounding.  */
1236          if (mem < UINTMAX_MAX)
1237            {
1238              n = mem;
1239              e = LONGINT_OK;
1240            }
1241          else
1242            e = LONGINT_OVERFLOW;
1243        }
1244        break;
1245      }
1246
1247  if (e == LONGINT_OK)
1248    {
1249      /* If multiple sort sizes are specified, take the maximum, so
1250         that option order does not matter.  */
1251      if (n < sort_size)
1252        return;
1253
1254      sort_size = n;
1255      if (sort_size == n)
1256        {
1257          sort_size = MAX (sort_size, MIN_SORT_SIZE);
1258          return;
1259        }
1260
1261      e = LONGINT_OVERFLOW;
1262    }
1263
1264  xstrtol_fatal (e, oi, c, long_options, s);
1265}
1266
1267/* Return the default sort size.  */
1268static size_t
1269default_sort_size (void)
1270{
1271  /* Let MEM be available memory or 1/8 of total memory, whichever
1272     is greater.  */
1273  double avail = physmem_available ();
1274  double total = physmem_total ();
1275  double mem = MAX (avail, total / 8);
1276  struct rlimit rlimit;
1277
1278  /* Let SIZE be MEM, but no more than the maximum object size or
1279     system resource limits.  Avoid the MIN macro here, as it is not
1280     quite right when only one argument is floating point.  Don't
1281     bother to check for values like RLIM_INFINITY since in practice
1282     they are not much less than SIZE_MAX.  */
1283  size_t size = SIZE_MAX;
1284  if (mem < size)
1285    size = mem;
1286  if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
1287    size = rlimit.rlim_cur;
1288#ifdef RLIMIT_AS
1289  if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
1290    size = rlimit.rlim_cur;
1291#endif
1292
1293  /* Leave a large safety margin for the above limits, as failure can
1294     occur when they are exceeded.  */
1295  size /= 2;
1296
1297#ifdef RLIMIT_RSS
1298  /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
1299     Exceeding RSS is not fatal, but can be quite slow.  */
1300  if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
1301    size = rlimit.rlim_cur / 16 * 15;
1302#endif
1303
1304  /* Use no less than the minimum.  */
1305  return MAX (size, MIN_SORT_SIZE);
1306}
1307
1308/* Return the sort buffer size to use with the input files identified
1309   by FPS and FILES, which are alternate names of the same files.
1310   NFILES gives the number of input files; NFPS may be less.  Assume
1311   that each input line requires LINE_BYTES extra bytes' worth of line
1312   information.  Do not exceed the size bound specified by the user
1313   (or a default size bound, if the user does not specify one).  */
1314
1315static size_t
1316sort_buffer_size (FILE *const *fps, size_t nfps,
1317                  char *const *files, size_t nfiles,
1318                  size_t line_bytes)
1319{
1320  /* A bound on the input size.  If zero, the bound hasn't been
1321     determined yet.  */
1322  static size_t size_bound;
1323
1324  /* In the worst case, each input byte is a newline.  */
1325  size_t worst_case_per_input_byte = line_bytes + 1;
1326
1327  /* Keep enough room for one extra input line and an extra byte.
1328     This extra room might be needed when preparing to read EOF.  */
1329  size_t size = worst_case_per_input_byte + 1;
1330
1331  size_t i;
1332
1333  for (i = 0; i < nfiles; i++)
1334    {
1335      struct stat st;
1336      off_t file_size;
1337      size_t worst_case;
1338
1339      if ((i < nfps ? fstat (fileno (fps[i]), &st)
1340           : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
1341           : stat (files[i], &st))
1342          != 0)
1343        die (_("stat failed"), files[i]);
1344
1345      if (S_ISREG (st.st_mode))
1346        file_size = st.st_size;
1347      else
1348        {
1349          /* The file has unknown size.  If the user specified a sort
1350             buffer size, use that; otherwise, guess the size.  */
1351          if (sort_size)
1352            return sort_size;
1353          file_size = INPUT_FILE_SIZE_GUESS;
1354        }
1355
1356      if (! size_bound)
1357        {
1358          size_bound = sort_size;
1359          if (! size_bound)
1360            size_bound = default_sort_size ();
1361        }
1362
1363      /* Add the amount of memory needed to represent the worst case
1364         where the input consists entirely of newlines followed by a
1365         single non-newline.  Check for overflow.  */
1366      worst_case = file_size * worst_case_per_input_byte + 1;
1367      if (file_size != worst_case / worst_case_per_input_byte
1368          || size_bound - size <= worst_case)
1369        return size_bound;
1370      size += worst_case;
1371    }
1372
1373  return size;
1374}
1375
1376/* Initialize BUF.  Reserve LINE_BYTES bytes for each line; LINE_BYTES
1377   must be at least sizeof (struct line).  Allocate ALLOC bytes
1378   initially.  */
1379
1380static void
1381initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
1382{
1383  /* Ensure that the line array is properly aligned.  If the desired
1384     size cannot be allocated, repeatedly halve it until allocation
1385     succeeds.  The smaller allocation may hurt overall performance,
1386     but that's better than failing.  */
1387  for (;;)
1388    {
1389      alloc += sizeof (struct line) - alloc % sizeof (struct line);
1390      buf->buf = malloc (alloc);
1391      if (buf->buf)
1392        break;
1393      alloc /= 2;
1394      if (alloc <= line_bytes + 1)
1395        xalloc_die ();
1396    }
1397
1398  buf->line_bytes = line_bytes;
1399  buf->alloc = alloc;
1400  buf->used = buf->left = buf->nlines = 0;
1401  buf->eof = false;
1402}
1403
1404/* Return one past the limit of the line array.  */
1405
1406static inline struct line *
1407buffer_linelim (struct buffer const *buf)
1408{
1409  return (struct line *) (buf->buf + buf->alloc);
1410}
1411
1412/* Return a pointer to the first character of the field specified
1413   by KEY in LINE. */
1414
1415static char *
1416begfield (const struct line *line, const struct keyfield *key)
1417{
1418  char *ptr = line->text, *lim = ptr + line->length - 1;
1419  size_t sword = key->sword;
1420  size_t schar = key->schar;
1421
1422  /* The leading field separator itself is included in a field when -t
1423     is absent.  */
1424
1425  if (tab != TAB_DEFAULT)
1426    while (ptr < lim && sword--)
1427      {
1428        while (ptr < lim && *ptr != tab)
1429          ++ptr;
1430        if (ptr < lim)
1431          ++ptr;
1432      }
1433  else
1434    while (ptr < lim && sword--)
1435      {
1436        while (ptr < lim && blanks[to_uchar (*ptr)])
1437          ++ptr;
1438        while (ptr < lim && !blanks[to_uchar (*ptr)])
1439          ++ptr;
1440      }
1441
1442  /* If we're ignoring leading blanks when computing the Start
1443     of the field, skip past them here.  */
1444  if (key->skipsblanks)
1445    while (ptr < lim && blanks[to_uchar (*ptr)])
1446      ++ptr;
1447
1448  /* Advance PTR by SCHAR (if possible), but no further than LIM.  */
1449  ptr = MIN (lim, ptr + schar);
1450
1451  return ptr;
1452}
1453
1454/* Return the limit of (a pointer to the first character after) the field
1455   in LINE specified by KEY. */
1456
1457static char *
1458limfield (const struct line *line, const struct keyfield *key)
1459{
1460  char *ptr = line->text, *lim = ptr + line->length - 1;
1461  size_t eword = key->eword, echar = key->echar;
1462
1463  if (echar == 0)
1464    eword++; /* Skip all of end field.  */
1465
1466  /* Move PTR past EWORD fields or to one past the last byte on LINE,
1467     whichever comes first.  If there are more than EWORD fields, leave
1468     PTR pointing at the beginning of the field having zero-based index,
1469     EWORD.  If a delimiter character was specified (via -t), then that
1470     `beginning' is the first character following the delimiting TAB.
1471     Otherwise, leave PTR pointing at the first `blank' character after
1472     the preceding field.  */
1473  if (tab != TAB_DEFAULT)
1474    while (ptr < lim && eword--)
1475      {
1476        while (ptr < lim && *ptr != tab)
1477          ++ptr;
1478        if (ptr < lim && (eword || echar))
1479          ++ptr;
1480      }
1481  else
1482    while (ptr < lim && eword--)
1483      {
1484        while (ptr < lim && blanks[to_uchar (*ptr)])
1485          ++ptr;
1486        while (ptr < lim && !blanks[to_uchar (*ptr)])
1487          ++ptr;
1488      }
1489
1490#ifdef POSIX_UNSPECIFIED
1491  /* The following block of code makes GNU sort incompatible with
1492     standard Unix sort, so it's ifdef'd out for now.
1493     The POSIX spec isn't clear on how to interpret this.
1494     FIXME: request clarification.
1495
1496     From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1497     Date: Thu, 30 May 96 12:20:41 -0400
1498     [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1499
1500     [...]I believe I've found another bug in `sort'.
1501
1502     $ cat /tmp/sort.in
1503     a b c 2 d
1504     pq rs 1 t
1505     $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1506     a b c 2 d
1507     pq rs 1 t
1508     $ /bin/sort -k1.7,1.7 </tmp/sort.in
1509     pq rs 1 t
1510     a b c 2 d
1511
1512     Unix sort produced the answer I expected: sort on the single character
1513     in column 7.  GNU sort produced different results, because it disagrees
1514     on the interpretation of the key-end spec "M.N".  Unix sort reads this
1515     as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1516     "skip M-1 fields, then either N-1 characters or the rest of the current
1517     field, whichever comes first".  This extra clause applies only to
1518     key-ends, not key-starts.
1519     */
1520
1521  /* Make LIM point to the end of (one byte past) the current field.  */
1522  if (tab != TAB_DEFAULT)
1523    {
1524      char *newlim;
1525      newlim = memchr (ptr, tab, lim - ptr);
1526      if (newlim)
1527        lim = newlim;
1528    }
1529  else
1530    {
1531      char *newlim;
1532      newlim = ptr;
1533      while (newlim < lim && blanks[to_uchar (*newlim)])
1534        ++newlim;
1535      while (newlim < lim && !blanks[to_uchar (*newlim)])
1536        ++newlim;
1537      lim = newlim;
1538    }
1539#endif
1540
1541  if (echar != 0) /* We need to skip over a portion of the end field.  */
1542    {
1543      /* If we're ignoring leading blanks when computing the End
1544         of the field, skip past them here.  */
1545      if (key->skipeblanks)
1546        while (ptr < lim && blanks[to_uchar (*ptr)])
1547          ++ptr;
1548
1549      /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
1550      ptr = MIN (lim, ptr + echar);
1551    }
1552
1553  return ptr;
1554}
1555
1556/* Fill BUF reading from FP, moving buf->left bytes from the end
1557   of buf->buf to the beginning first.  If EOF is reached and the
1558   file wasn't terminated by a newline, supply one.  Set up BUF's line
1559   table too.  FILE is the name of the file corresponding to FP.
1560   Return true if some input was read.  */
1561
1562static bool
1563fillbuf (struct buffer *buf, FILE *fp, char const *file)
1564{
1565  struct keyfield const *key = keylist;
1566  char eol = eolchar;
1567  size_t line_bytes = buf->line_bytes;
1568  size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1569
1570  if (buf->eof)
1571    return false;
1572
1573  if (buf->used != buf->left)
1574    {
1575      memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1576      buf->used = buf->left;
1577      buf->nlines = 0;
1578    }
1579
1580  for (;;)
1581    {
1582      char *ptr = buf->buf + buf->used;
1583      struct line *linelim = buffer_linelim (buf);
1584      struct line *line = linelim - buf->nlines;
1585      size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1586      char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1587
1588      while (line_bytes + 1 < avail)
1589        {
1590          /* Read as many bytes as possible, but do not read so many
1591             bytes that there might not be enough room for the
1592             corresponding line array.  The worst case is when the
1593             rest of the input file consists entirely of newlines,
1594             except that the last byte is not a newline.  */
1595          size_t readsize = (avail - 1) / (line_bytes + 1);
1596          size_t bytes_read = fread (ptr, 1, readsize, fp);
1597          char *ptrlim = ptr + bytes_read;
1598          char *p;
1599          avail -= bytes_read;
1600
1601          if (bytes_read != readsize)
1602            {
1603              if (ferror (fp))
1604                die (_("read failed"), file);
1605              if (feof (fp))
1606                {
1607                  buf->eof = true;
1608                  if (buf->buf == ptrlim)
1609                    return false;
1610                  if (ptrlim[-1] != eol)
1611                    *ptrlim++ = eol;
1612                }
1613            }
1614
1615          /* Find and record each line in the just-read input.  */
1616          while ((p = memchr (ptr, eol, ptrlim - ptr)))
1617            {
1618              ptr = p + 1;
1619              line--;
1620              line->text = line_start;
1621              line->length = ptr - line_start;
1622              mergesize = MAX (mergesize, line->length);
1623              avail -= line_bytes;
1624
1625              if (key)
1626                {
1627                  /* Precompute the position of the first key for
1628                     efficiency.  */
1629                  line->keylim = (key->eword == SIZE_MAX
1630                                  ? p
1631                                  : limfield (line, key));
1632
1633                  if (key->sword != SIZE_MAX)
1634                    line->keybeg = begfield (line, key);
1635                  else
1636                    {
1637                      if (key->skipsblanks)
1638                        while (blanks[to_uchar (*line_start)])
1639                          line_start++;
1640                      line->keybeg = line_start;
1641                    }
1642                }
1643
1644              line_start = ptr;
1645            }
1646
1647          ptr = ptrlim;
1648          if (buf->eof)
1649            break;
1650        }
1651
1652      buf->used = ptr - buf->buf;
1653      buf->nlines = buffer_linelim (buf) - line;
1654      if (buf->nlines != 0)
1655        {
1656          buf->left = ptr - line_start;
1657          merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1658          return true;
1659        }
1660
1661      {
1662        /* The current input line is too long to fit in the buffer.
1663           Double the buffer size and try again, keeping it properly
1664           aligned.  */
1665        size_t line_alloc = buf->alloc / sizeof (struct line);
1666        buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
1667        buf->alloc = line_alloc * sizeof (struct line);
1668      }
1669    }
1670}
1671
1672/* Compare strings A and B as numbers without explicitly converting them to
1673   machine numbers.  Comparatively slow for short strings, but asymptotically
1674   hideously fast. */
1675
1676static int
1677numcompare (const char *a, const char *b)
1678{
1679  while (blanks[to_uchar (*a)])
1680    a++;
1681  while (blanks[to_uchar (*b)])
1682    b++;
1683
1684  return strnumcmp (a, b, decimal_point, thousands_sep);
1685}
1686
1687/* Exit with an error if a mixture of SI and IEC units detected.  */
1688
1689static void
1690check_mixed_SI_IEC (char prefix, struct keyfield *key)
1691{
1692  int si_present = prefix == 'i';
1693  if (key->si_present != -1 && si_present != key->si_present)
1694    error (SORT_FAILURE, 0, _("both SI and IEC prefixes present on units"));
1695  key->si_present = si_present;
1696}
1697
1698/* Return an integer which represents the order of magnitude of
1699   the unit following the number.  NUMBER can contain thousands separators
1700   or a decimal point, but not have preceeding blanks.
1701   Negative numbers return a negative unit order.  */
1702
1703static int
1704find_unit_order (const char *number, struct keyfield *key)
1705{
1706  static const char orders [UCHAR_LIM] =
1707    {
1708#if SOME_DAY_WE_WILL_REQUIRE_C99
1709      ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8,
1710      ['k']=1,
1711#else
1712      /* Generate the following table with this command:
1713         perl -e 'my %a=(k=>1, K=>1, M=>2, G=>3, T=>4, P=>5, E=>6, Z=>7, Y=>8);
1714         foreach my $i (0..255) {my $c=chr($i); $a{$c} ||= 0;print "$a{$c}, "}'\
1715         |fmt  */
1716      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1717      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1718      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3,
1719      0, 0, 0, 1, 0, 2, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0,
1720      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1721      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1722      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1723      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1724      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1725      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1726      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1727#endif
1728    };
1729
1730  const unsigned char *p = number;
1731
1732  int sign = 1;
1733
1734  if (*p == '-')
1735    {
1736      sign = -1;
1737      p++;
1738    }
1739
1740  /* Scan to end of number.
1741     Decimals or separators not followed by digits stop the scan.
1742     Numbers ending in decimals or separators are thus considered
1743     to be lacking in units.
1744     FIXME: add support for multibyte thousands_sep and decimal_point.  */
1745
1746  while (ISDIGIT (*p))
1747    {
1748      p++;
1749
1750      if (*p == decimal_point && ISDIGIT (*(p + 1)))
1751        p += 2;
1752      else if (*p == thousands_sep && ISDIGIT (*(p + 1)))
1753        p += 2;
1754    }
1755
1756  {
1757  int order = orders[*p];
1758
1759  /* For valid units check for MiB vs MB etc.  */
1760  if (order)
1761    check_mixed_SI_IEC (*(p + 1), key);
1762
1763  return sign * order;
1764  }
1765}
1766
1767/* Compare numbers ending in units with SI xor IEC prefixes
1768       <none/unknown> < K/k < M < G < T < P < E < Z < Y
1769   Assume that numbers are properly abbreviated.
1770   i.e. input will never have both 6000K and 5M.  */
1771
1772static int
1773human_numcompare (const char *a, const char *b, struct keyfield *key)
1774{
1775  while (blanks[to_uchar (*a)])
1776    a++;
1777  while (blanks[to_uchar (*b)])
1778    b++;
1779
1780  {
1781  int order_a = find_unit_order (a, key);
1782  int order_b = find_unit_order (b, key);
1783
1784  return (order_a > order_b ? 1
1785          : order_a < order_b ? -1
1786          : strnumcmp (a, b, decimal_point, thousands_sep));
1787  }
1788}
1789
1790static int
1791general_numcompare (const char *sa, const char *sb)
1792{
1793  /* FIXME: add option to warn about failed conversions.  */
1794  /* FIXME: maybe add option to try expensive FP conversion
1795     only if A and B can't be compared more cheaply/accurately.  */
1796
1797  char *ea;
1798  char *eb;
1799  double a = strtod (sa, &ea);
1800  double b = strtod (sb, &eb);
1801
1802  /* Put conversion errors at the start of the collating sequence.  */
1803  if (sa == ea)
1804    return sb == eb ? 0 : -1;
1805  if (sb == eb)
1806    return 1;
1807
1808  /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
1809     conversion errors but before numbers; sort them by internal
1810     bit-pattern, for lack of a more portable alternative.  */
1811  return (a < b ? -1
1812          : a > b ? 1
1813          : a == b ? 0
1814          : b == b ? -1
1815          : a == a ? 1
1816          : memcmp ((char *) &a, (char *) &b, sizeof a));
1817}
1818
1819/* Return an integer in 1..12 of the month name MONTH with length LEN.
1820   Return 0 if the name in S is not recognized.  */
1821
1822static int
1823getmonth (char const *month, size_t len)
1824{
1825  size_t lo = 0;
1826  size_t hi = MONTHS_PER_YEAR;
1827  char const *monthlim = month + len;
1828
1829  for (;;)
1830    {
1831      if (month == monthlim)
1832        return 0;
1833      if (!blanks[to_uchar (*month)])
1834        break;
1835      ++month;
1836    }
1837
1838  do
1839    {
1840      size_t ix = (lo + hi) / 2;
1841      char const *m = month;
1842      char const *n = monthtab[ix].name;
1843
1844      for (;; m++, n++)
1845        {
1846          if (!*n)
1847            return monthtab[ix].val;
1848          if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1849            {
1850              hi = ix;
1851              break;
1852            }
1853          else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1854            {
1855              lo = ix + 1;
1856              break;
1857            }
1858        }
1859    }
1860  while (lo < hi);
1861
1862  return 0;
1863}
1864
1865/* A source of random data.  */
1866static struct randread_source *randread_source;
1867
1868/* Return the Ith randomly-generated state.  The caller must invoke
1869   random_state (H) for all H less than I before invoking random_state
1870   (I).  */
1871
1872static struct md5_ctx
1873random_state (size_t i)
1874{
1875  /* An array of states resulting from the random data, and counts of
1876     its used and allocated members.  */
1877  static struct md5_ctx *state;
1878  static size_t used;
1879  static size_t allocated;
1880
1881  struct md5_ctx *s = &state[i];
1882
1883  if (used <= i)
1884    {
1885      unsigned char buf[MD5_DIGEST_SIZE];
1886
1887      used++;
1888
1889      if (allocated <= i)
1890        {
1891          state = X2NREALLOC (state, &allocated);
1892          s = &state[i];
1893        }
1894
1895      randread (randread_source, buf, sizeof buf);
1896      md5_init_ctx (s);
1897      md5_process_bytes (buf, sizeof buf, s);
1898    }
1899
1900  return *s;
1901}
1902
1903/* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
1904   with length LENGTHB.  Return negative if less, zero if equal,
1905   positive if greater.  */
1906
1907static int
1908cmp_hashes (char const *texta, size_t lena,
1909            char const *textb, size_t lenb)
1910{
1911  /* Try random hashes until a pair of hashes disagree.  But if the
1912     first pair of random hashes agree, check whether the keys are
1913     identical and if so report no difference.  */
1914  int diff;
1915  size_t i;
1916  for (i = 0; ; i++)
1917    {
1918      uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
1919      struct md5_ctx s[2];
1920      s[0] = s[1] = random_state (i);
1921      md5_process_bytes (texta, lena, &s[0]);  md5_finish_ctx (&s[0], dig[0]);
1922      md5_process_bytes (textb, lenb, &s[1]);  md5_finish_ctx (&s[1], dig[1]);
1923      diff = memcmp (dig[0], dig[1], sizeof dig[0]);
1924      if (diff != 0)
1925        break;
1926      if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
1927        break;
1928    }
1929
1930  return diff;
1931}
1932
1933/* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1934   using one or more random hash functions.  */
1935
1936static int
1937compare_random (char *restrict texta, size_t lena,
1938                char *restrict textb, size_t lenb)
1939{
1940  int diff;
1941
1942  if (! hard_LC_COLLATE)
1943    diff = cmp_hashes (texta, lena, textb, lenb);
1944  else
1945    {
1946      /* Transform the text into the basis of comparison, so that byte
1947         strings that would otherwise considered to be equal are
1948         considered equal here even if their bytes differ.  */
1949
1950      char *buf = NULL;
1951      char stackbuf[4000];
1952      size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
1953      bool a_fits = tlena <= sizeof stackbuf;
1954      size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
1955                               (a_fits ? sizeof stackbuf - tlena : 0),
1956                               textb, lenb);
1957
1958      if (a_fits && tlena + tlenb <= sizeof stackbuf)
1959        buf = stackbuf;
1960      else
1961        {
1962          /* Adding 1 to the buffer size lets xmemxfrm run a bit
1963             faster by avoiding the need for an extra buffer copy.  */
1964          buf = xmalloc (tlena + tlenb + 1);
1965          xmemxfrm (buf, tlena + 1, texta, lena);
1966          xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
1967        }
1968
1969      diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
1970
1971      if (buf != stackbuf)
1972        free (buf);
1973    }
1974
1975  return diff;
1976}
1977
1978/* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
1979   using filevercmp. See lib/filevercmp.h for function description. */
1980
1981static int
1982compare_version (char *restrict texta, size_t lena,
1983                 char *restrict textb, size_t lenb)
1984{
1985  int diff;
1986
1987  /* It is necessary to save the character after the end of the field.
1988     "filevercmp" works with NUL terminated strings.  Our blocks of
1989     text are not necessarily terminated with a NUL byte. */
1990  char sv_a = texta[lena];
1991  char sv_b = textb[lenb];
1992
1993  texta[lena] = '\0';
1994  textb[lenb] = '\0';
1995
1996  diff = filevercmp (texta, textb);
1997
1998  texta[lena] = sv_a;
1999  textb[lenb] = sv_b;
2000
2001  return diff;
2002}
2003
2004/* Compare two lines A and B trying every key in sequence until there
2005   are no more keys or a difference is found. */
2006
2007static int
2008keycompare (const struct line *a, const struct line *b)
2009{
2010  struct keyfield *key = keylist;
2011
2012  /* For the first iteration only, the key positions have been
2013     precomputed for us. */
2014  char *texta = a->keybeg;
2015  char *textb = b->keybeg;
2016  char *lima = a->keylim;
2017  char *limb = b->keylim;
2018
2019  int diff;
2020
2021  for (;;)
2022    {
2023      char const *translate = key->translate;
2024      bool const *ignore = key->ignore;
2025
2026      /* Treat field ends before field starts as empty fields.  */
2027      lima = MAX (texta, lima);
2028      limb = MAX (textb, limb);
2029
2030      {
2031      /* Find the lengths. */
2032      size_t lena = lima - texta;
2033      size_t lenb = limb - textb;
2034
2035      /* Actually compare the fields. */
2036
2037      if (key->random)
2038        diff = compare_random (texta, lena, textb, lenb);
2039      else if (key->numeric || key->general_numeric || key->human_numeric)
2040        {
2041          char savea = *lima, saveb = *limb;
2042
2043          *lima = *limb = '\0';
2044          diff = (key->numeric ? numcompare (texta, textb)
2045                  : key->general_numeric ? general_numcompare (texta, textb)
2046                  : human_numcompare (texta, textb, key));
2047          *lima = savea, *limb = saveb;
2048        }
2049      else if (key->version)
2050        diff = compare_version (texta, lena, textb, lenb);
2051      else if (key->month)
2052        diff = getmonth (texta, lena) - getmonth (textb, lenb);
2053      /* Sorting like this may become slow, so in a simple locale the user
2054         can select a faster sort that is similar to ascii sort.  */
2055      else if (hard_LC_COLLATE)
2056        {
2057          if (ignore || translate)
2058            {
2059              char buf[4000];
2060              size_t size = lena + 1 + lenb + 1;
2061              char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
2062              char *copy_b = copy_a + lena + 1;
2063              size_t new_len_a, new_len_b, i;
2064
2065              /* Ignore and/or translate chars before comparing.  */
2066              for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
2067                {
2068                  if (i < lena)
2069                    {
2070                      copy_a[new_len_a] = (translate
2071                                           ? translate[to_uchar (texta[i])]
2072                                           : texta[i]);
2073                      if (!ignore || !ignore[to_uchar (texta[i])])
2074                        ++new_len_a;
2075                    }
2076                  if (i < lenb)
2077                    {
2078                      copy_b[new_len_b] = (translate
2079                                           ? translate[to_uchar (textb[i])]
2080                                           : textb [i]);
2081                      if (!ignore || !ignore[to_uchar (textb[i])])
2082                        ++new_len_b;
2083                    }
2084                }
2085
2086              diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
2087
2088              if (sizeof buf < size)
2089                free (copy_a);
2090            }
2091          else if (lena == 0)
2092            diff = - NONZERO (lenb);
2093          else if (lenb == 0)
2094            goto greater;
2095          else
2096            diff = xmemcoll (texta, lena, textb, lenb);
2097        }
2098      else if (ignore)
2099        {
2100#define CMP_WITH_IGNORE(A, B)						\
2101  do									\
2102    {									\
2103          for (;;)							\
2104            {								\
2105              while (texta < lima && ignore[to_uchar (*texta)])		\
2106                ++texta;						\
2107              while (textb < limb && ignore[to_uchar (*textb)])		\
2108                ++textb;						\
2109              if (! (texta < lima && textb < limb))			\
2110                break;							\
2111              diff = to_uchar (A) - to_uchar (B);			\
2112              if (diff)							\
2113                goto not_equal;						\
2114              ++texta;							\
2115              ++textb;							\
2116            }								\
2117                                                                        \
2118          diff = (texta < lima) - (textb < limb);			\
2119    }									\
2120  while (0)
2121
2122          if (translate)
2123            CMP_WITH_IGNORE (translate[to_uchar (*texta)],
2124                             translate[to_uchar (*textb)]);
2125          else
2126            CMP_WITH_IGNORE (*texta, *textb);
2127        }
2128      else if (lena == 0)
2129        diff = - NONZERO (lenb);
2130      else if (lenb == 0)
2131        goto greater;
2132      else
2133        {
2134          if (translate)
2135            {
2136              while (texta < lima && textb < limb)
2137                {
2138                  diff = (to_uchar (translate[to_uchar (*texta++)])
2139                          - to_uchar (translate[to_uchar (*textb++)]));
2140                  if (diff)
2141                    goto not_equal;
2142                }
2143            }
2144          else
2145            {
2146              diff = memcmp (texta, textb, MIN (lena, lenb));
2147              if (diff)
2148                goto not_equal;
2149            }
2150          diff = lena < lenb ? -1 : lena != lenb;
2151        }
2152
2153      if (diff)
2154        goto not_equal;
2155
2156      key = key->next;
2157      if (! key)
2158        break;
2159
2160      /* Find the beginning and limit of the next field.  */
2161      if (key->eword != SIZE_MAX)
2162        lima = limfield (a, key), limb = limfield (b, key);
2163      else
2164        lima = a->text + a->length - 1, limb = b->text + b->length - 1;
2165
2166      if (key->sword != SIZE_MAX)
2167        texta = begfield (a, key), textb = begfield (b, key);
2168      else
2169        {
2170          texta = a->text, textb = b->text;
2171          if (key->skipsblanks)
2172            {
2173              while (texta < lima && blanks[to_uchar (*texta)])
2174                ++texta;
2175              while (textb < limb && blanks[to_uchar (*textb)])
2176                ++textb;
2177            }
2178        }
2179      }
2180    }
2181
2182  return 0;
2183
2184 greater:
2185  diff = 1;
2186 not_equal:
2187  return key->reverse ? -diff : diff;
2188}
2189
2190/* Compare two lines A and B, returning negative, zero, or positive
2191   depending on whether A compares less than, equal to, or greater than B. */
2192
2193static int
2194compare (const struct line *a, const struct line *b)
2195{
2196  int diff;
2197  size_t alen, blen;
2198
2199  /* First try to compare on the specified keys (if any).
2200     The only two cases with no key at all are unadorned sort,
2201     and unadorned sort -r. */
2202  if (keylist)
2203    {
2204      diff = keycompare (a, b);
2205      if (diff || unique || stable)
2206        return diff;
2207    }
2208
2209  /* If the keys all compare equal (or no keys were specified)
2210     fall through to the default comparison.  */
2211  alen = a->length - 1, blen = b->length - 1;
2212
2213  if (alen == 0)
2214    diff = - NONZERO (blen);
2215  else if (blen == 0)
2216    diff = 1;
2217  else if (hard_LC_COLLATE)
2218    diff = xmemcoll (a->text, alen, b->text, blen);
2219  else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
2220    diff = alen < blen ? -1 : alen != blen;
2221
2222  return reverse ? -diff : diff;
2223}
2224
2225/* Check that the lines read from FILE_NAME come in order.  Return
2226   true if they are in order.  If CHECKONLY == 'c', also print a
2227   diagnostic (FILE_NAME, line number, contents of line) to stderr if
2228   they are not in order.  */
2229
2230static bool
2231check (char const *file_name, char checkonly)
2232{
2233  FILE *fp = xfopen (file_name, "r");
2234  struct buffer buf;		/* Input buffer. */
2235  struct line temp;		/* Copy of previous line. */
2236  size_t alloc = 0;
2237  uintmax_t line_number = 0;
2238  struct keyfield const *key = keylist;
2239  bool nonunique = ! unique;
2240  bool ordered = true;
2241
2242  initbuf (&buf, sizeof (struct line),
2243           MAX (merge_buffer_size, sort_size));
2244  temp.text = NULL;
2245
2246  while (fillbuf (&buf, fp, file_name))
2247    {
2248      struct line const *line = buffer_linelim (&buf);
2249      struct line const *linebase = line - buf.nlines;
2250
2251      /* Make sure the line saved from the old buffer contents is
2252         less than or equal to the first line of the new buffer. */
2253      if (alloc && nonunique <= compare (&temp, line - 1))
2254        {
2255        found_disorder:
2256          {
2257            if (checkonly == 'c')
2258              {
2259                struct line const *disorder_line = line - 1;
2260                uintmax_t disorder_line_number =
2261                  buffer_linelim (&buf) - disorder_line + line_number;
2262                char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
2263                fprintf (stderr, _("%s: %s:%s: disorder: "),
2264                         program_name, file_name,
2265                         umaxtostr (disorder_line_number, hr_buf));
2266                write_bytes (disorder_line->text, disorder_line->length,
2267                             stderr, _("standard error"));
2268              }
2269
2270            ordered = false;
2271            break;
2272          }
2273        }
2274
2275      /* Compare each line in the buffer with its successor.  */
2276      while (linebase < --line)
2277        if (nonunique <= compare (line, line - 1))
2278          goto found_disorder;
2279
2280      line_number += buf.nlines;
2281
2282      /* Save the last line of the buffer.  */
2283      if (alloc < line->length)
2284        {
2285          do
2286            {
2287              alloc *= 2;
2288              if (! alloc)
2289                {
2290                  alloc = line->length;
2291                  break;
2292                }
2293            }
2294          while (alloc < line->length);
2295
2296          temp.text = xrealloc (temp.text, alloc);
2297        }
2298      memcpy (temp.text, line->text, line->length);
2299      temp.length = line->length;
2300      if (key)
2301        {
2302          temp.keybeg = temp.text + (line->keybeg - line->text);
2303          temp.keylim = temp.text + (line->keylim - line->text);
2304        }
2305    }
2306
2307  xfclose (fp, file_name);
2308  free (buf.buf);
2309  free (temp.text);
2310  return ordered;
2311}
2312
2313/* Open FILES (there are NFILES of them) and store the resulting array
2314   of stream pointers into (*PFPS).  Allocate the array.  Return the
2315   number of successfully opened files, setting errno if this value is
2316   less than NFILES.  */
2317
2318static size_t
2319open_input_files (struct sortfile *files, size_t nfiles, FILE ***pfps)
2320{
2321  FILE **fps = *pfps = xnmalloc (nfiles, sizeof *fps);
2322  int i;
2323
2324  /* Open as many input files as we can.  */
2325  for (i = 0; i < nfiles; i++)
2326    {
2327      fps[i] = (files[i].pid
2328                ? open_temp (files[i].name, files[i].pid)
2329                : stream_open (files[i].name, "r"));
2330      if (!fps[i])
2331        break;
2332    }
2333
2334  return i;
2335}
2336
2337/* Merge lines from FILES onto OFP.  NTEMPS is the number of temporary
2338   files (all of which are at the start of the FILES array), and
2339   NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2340   FPS is the vector of open stream corresponding to the files.
2341   Close input and output streams before returning.
2342   OUTPUT_FILE gives the name of the output file.  If it is NULL,
2343   the output file is standard output.  */
2344
2345static void
2346mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
2347          FILE *ofp, char const *output_file, FILE **fps)
2348{
2349  struct buffer *buffer = xnmalloc (nfiles, sizeof *buffer);
2350                                /* Input buffers for each file. */
2351  struct line saved;		/* Saved line storage for unique check. */
2352  struct line const *savedline = NULL;
2353                                /* &saved if there is a saved line. */
2354  size_t savealloc = 0;		/* Size allocated for the saved line. */
2355  struct line const **cur = xnmalloc (nfiles, sizeof *cur);
2356                                /* Current line in each line table. */
2357  struct line const **base = xnmalloc (nfiles, sizeof *base);
2358                                /* Base of each line table.  */
2359  size_t *ord = xnmalloc (nfiles, sizeof *ord);
2360                                /* Table representing a permutation of fps,
2361                                   such that cur[ord[0]] is the smallest line
2362                                   and will be next output. */
2363  size_t i;
2364  size_t j;
2365  size_t t;
2366  struct keyfield const *key = keylist;
2367  saved.text = NULL;
2368
2369  /* Read initial lines from each input file. */
2370  for (i = 0; i < nfiles; )
2371    {
2372      initbuf (&buffer[i], sizeof (struct line),
2373               MAX (merge_buffer_size, sort_size / nfiles));
2374      if (fillbuf (&buffer[i], fps[i], files[i].name))
2375        {
2376          struct line const *linelim = buffer_linelim (&buffer[i]);
2377          cur[i] = linelim - 1;
2378          base[i] = linelim - buffer[i].nlines;
2379          i++;
2380        }
2381      else
2382        {
2383          /* fps[i] is empty; eliminate it from future consideration.  */
2384          xfclose (fps[i], files[i].name);
2385          if (i < ntemps)
2386            {
2387              ntemps--;
2388              zaptemp (files[i].name);
2389            }
2390          free (buffer[i].buf);
2391          --nfiles;
2392          for (j = i; j < nfiles; ++j)
2393            {
2394              files[j] = files[j + 1];
2395              fps[j] = fps[j + 1];
2396            }
2397        }
2398    }
2399
2400  /* Set up the ord table according to comparisons among input lines.
2401     Since this only reorders two items if one is strictly greater than
2402     the other, it is stable. */
2403  for (i = 0; i < nfiles; ++i)
2404    ord[i] = i;
2405  for (i = 1; i < nfiles; ++i)
2406    if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2407      t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2408
2409  /* Repeatedly output the smallest line until no input remains. */
2410  while (nfiles)
2411    {
2412      struct line const *smallest = cur[ord[0]];
2413
2414      /* If uniquified output is turned on, output only the first of
2415         an identical series of lines. */
2416      if (unique)
2417        {
2418          if (savedline && compare (savedline, smallest))
2419            {
2420              savedline = NULL;
2421              write_bytes (saved.text, saved.length, ofp, output_file);
2422            }
2423          if (!savedline)
2424            {
2425              savedline = &saved;
2426              if (savealloc < smallest->length)
2427                {
2428                  do
2429                    if (! savealloc)
2430                      {
2431                        savealloc = smallest->length;
2432                        break;
2433                      }
2434                  while ((savealloc *= 2) < smallest->length);
2435
2436                  saved.text = xrealloc (saved.text, savealloc);
2437                }
2438              saved.length = smallest->length;
2439              memcpy (saved.text, smallest->text, saved.length);
2440              if (key)
2441                {
2442                  saved.keybeg =
2443                    saved.text + (smallest->keybeg - smallest->text);
2444                  saved.keylim =
2445                    saved.text + (smallest->keylim - smallest->text);
2446                }
2447            }
2448        }
2449      else
2450        write_bytes (smallest->text, smallest->length, ofp, output_file);
2451
2452      /* Check if we need to read more lines into core. */
2453      if (base[ord[0]] < smallest)
2454        cur[ord[0]] = smallest - 1;
2455      else
2456        {
2457          if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
2458            {
2459              struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2460              cur[ord[0]] = linelim - 1;
2461              base[ord[0]] = linelim - buffer[ord[0]].nlines;
2462            }
2463          else
2464            {
2465              /* We reached EOF on fps[ord[0]].  */
2466              for (i = 1; i < nfiles; ++i)
2467                if (ord[i] > ord[0])
2468                  --ord[i];
2469              --nfiles;
2470              xfclose (fps[ord[0]], files[ord[0]].name);
2471              if (ord[0] < ntemps)
2472                {
2473                  ntemps--;
2474                  zaptemp (files[ord[0]].name);
2475                }
2476              free (buffer[ord[0]].buf);
2477              for (i = ord[0]; i < nfiles; ++i)
2478                {
2479                  fps[i] = fps[i + 1];
2480                  files[i] = files[i + 1];
2481                  buffer[i] = buffer[i + 1];
2482                  cur[i] = cur[i + 1];
2483                  base[i] = base[i + 1];
2484                }
2485              for (i = 0; i < nfiles; ++i)
2486                ord[i] = ord[i + 1];
2487              continue;
2488            }
2489        }
2490
2491      /* The new line just read in may be larger than other lines
2492         already in main memory; push it back in the queue until we
2493         encounter a line larger than it.  Optimize for the common
2494         case where the new line is smallest.  */
2495      {
2496        size_t lo = 1;
2497        size_t hi = nfiles;
2498        size_t probe = lo;
2499        size_t ord0 = ord[0];
2500        size_t count_of_smaller_lines;
2501
2502        while (lo < hi)
2503          {
2504            int cmp = compare (cur[ord0], cur[ord[probe]]);
2505            if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
2506              hi = probe;
2507            else
2508              lo = probe + 1;
2509            probe = (lo + hi) / 2;
2510          }
2511
2512        count_of_smaller_lines = lo - 1;
2513        for (j = 0; j < count_of_smaller_lines; j++)
2514          ord[j] = ord[j + 1];
2515        ord[count_of_smaller_lines] = ord0;
2516      }
2517
2518      /* Free up some resources every once in a while.  */
2519      if (MAX_PROCS_BEFORE_REAP < nprocs)
2520        reap_some ();
2521    }
2522
2523  if (unique && savedline)
2524    {
2525      write_bytes (saved.text, saved.length, ofp, output_file);
2526      free (saved.text);
2527    }
2528
2529  xfclose (ofp, output_file);
2530  free(fps);
2531  free(buffer);
2532  free(ord);
2533  free(base);
2534  free(cur);
2535}
2536
2537/* Merge lines from FILES onto OFP.  NTEMPS is the number of temporary
2538   files (all of which are at the start of the FILES array), and
2539   NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
2540   Close input and output files before returning.
2541   OUTPUT_FILE gives the name of the output file.
2542
2543   Return the number of files successfully merged.  This number can be
2544   less than NFILES if we ran low on file descriptors, but in this
2545   case it is never less than 2.  */
2546
2547static size_t
2548mergefiles (struct sortfile *files, size_t ntemps, size_t nfiles,
2549            FILE *ofp, char const *output_file)
2550{
2551  FILE **fps;
2552  size_t nopened = open_input_files (files, nfiles, &fps);
2553  if (nopened < nfiles && nopened < 2)
2554    die (_("open failed"), files[nopened].name);
2555  mergefps (files, ntemps, nopened, ofp, output_file, fps);
2556  return nopened;
2557}
2558
2559/* Merge into T the two sorted arrays of lines LO (with NLO members)
2560   and HI (with NHI members).  T, LO, and HI point just past their
2561   respective arrays, and the arrays are in reverse order.  NLO and
2562   NHI must be positive, and HI - NHI must equal T - (NLO + NHI).  */
2563
2564static inline void
2565mergelines (struct line *t,
2566            struct line const *lo, size_t nlo,
2567            struct line const *hi, size_t nhi)
2568{
2569  for (;;)
2570    if (compare (lo - 1, hi - 1) <= 0)
2571      {
2572        *--t = *--lo;
2573        if (! --nlo)
2574          {
2575            /* HI - NHI equalled T - (NLO + NHI) when this function
2576               began.  Therefore HI must equal T now, and there is no
2577               need to copy from HI to T.  */
2578            return;
2579          }
2580      }
2581    else
2582      {
2583        *--t = *--hi;
2584        if (! --nhi)
2585          {
2586            do
2587              *--t = *--lo;
2588            while (--nlo);
2589
2590            return;
2591          }
2592      }
2593}
2594
2595/* Sort the array LINES with NLINES members, using TEMP for temporary space.
2596   NLINES must be at least 2.
2597   The input and output arrays are in reverse order, and LINES and
2598   TEMP point just past the end of their respective arrays.
2599
2600   Use a recursive divide-and-conquer algorithm, in the style
2601   suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23.  Use
2602   the optimization suggested by exercise 5.2.4-10; this requires room
2603   for only 1.5*N lines, rather than the usual 2*N lines.  Knuth
2604   writes that this memory optimization was originally published by
2605   D. A. Bell, Comp J. 1 (1958), 75.  */
2606
2607static void
2608sortlines (struct line *lines, size_t nlines, struct line *temp)
2609{
2610  if (nlines == 2)
2611    {
2612      if (0 < compare (&lines[-1], &lines[-2]))
2613        {
2614          struct line tmp = lines[-1];
2615          lines[-1] = lines[-2];
2616          lines[-2] = tmp;
2617        }
2618    }
2619  else
2620    {
2621      size_t nlo = nlines / 2;
2622      size_t nhi = nlines - nlo;
2623      struct line *lo = lines;
2624      struct line *hi = lines - nlo;
2625      struct line *sorted_lo = temp;
2626
2627      sortlines (hi, nhi, temp);
2628      if (1 < nlo)
2629        sortlines_temp (lo, nlo, sorted_lo);
2630      else
2631        sorted_lo[-1] = lo[-1];
2632
2633      mergelines (lines, sorted_lo, nlo, hi, nhi);
2634    }
2635}
2636
2637/* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2638   rather than sorting in place.  */
2639
2640static void
2641sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2642{
2643  if (nlines == 2)
2644    {
2645      /* Declare `swap' as int, not bool, to work around a bug
2646         <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
2647         in the IBM xlc 6.0.0.0 compiler in 64-bit mode.  */
2648      int swap = (0 < compare (&lines[-1], &lines[-2]));
2649      temp[-1] = lines[-1 - swap];
2650      temp[-2] = lines[-2 + swap];
2651    }
2652  else
2653    {
2654      size_t nlo = nlines / 2;
2655      size_t nhi = nlines - nlo;
2656      struct line *lo = lines;
2657      struct line *hi = lines - nlo;
2658      struct line *sorted_hi = temp - nlo;
2659
2660      sortlines_temp (hi, nhi, sorted_hi);
2661      if (1 < nlo)
2662        sortlines (lo, nlo, temp);
2663
2664      mergelines (temp, lo, nlo, sorted_hi, nhi);
2665    }
2666}
2667
2668/* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
2669   the same as OUTFILE.  If found, merge the found instances (and perhaps
2670   some other files) into a temporary file so that it can in turn be
2671   merged into OUTFILE without destroying OUTFILE before it is completely
2672   read.  Return the new value of NFILES, which differs from the old if
2673   some merging occurred.
2674
2675   This test ensures that an otherwise-erroneous use like
2676   "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2677   It's not clear that POSIX requires this nicety.
2678   Detect common error cases, but don't try to catch obscure cases like
2679   "cat ... FILE ... | sort -m -o FILE"
2680   where traditional "sort" doesn't copy the input and where
2681   people should know that they're getting into trouble anyway.
2682   Catching these obscure cases would slow down performance in
2683   common cases.  */
2684
2685static size_t
2686avoid_trashing_input (struct sortfile *files, size_t ntemps,
2687                      size_t nfiles, char const *outfile)
2688{
2689  size_t i;
2690  bool got_outstat = false;
2691  struct stat outstat;
2692
2693  for (i = ntemps; i < nfiles; i++)
2694    {
2695      bool is_stdin = STREQ (files[i].name, "-");
2696      bool same;
2697      struct stat instat;
2698
2699      if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
2700        same = true;
2701      else
2702        {
2703          if (! got_outstat)
2704            {
2705              if ((outfile
2706                   ? stat (outfile, &outstat)
2707                   : fstat (STDOUT_FILENO, &outstat))
2708                  != 0)
2709                break;
2710              got_outstat = true;
2711            }
2712
2713          same = (((is_stdin
2714                    ? fstat (STDIN_FILENO, &instat)
2715                    : stat (files[i].name, &instat))
2716                   == 0)
2717                  && SAME_INODE (instat, outstat));
2718        }
2719
2720      if (same)
2721        {
2722          FILE *tftp;
2723          pid_t pid;
2724          char *temp = create_temp (&tftp, &pid);
2725          size_t num_merged = 0;
2726          do
2727            {
2728              num_merged += mergefiles (&files[i], 0, nfiles - i, tftp, temp);
2729              files[i].name = temp;
2730              files[i].pid = pid;
2731
2732              if (i + num_merged < nfiles)
2733                memmove(&files[i + 1], &files[i + num_merged],
2734                        num_merged * sizeof *files);
2735              ntemps += 1;
2736              nfiles -= num_merged - 1;;
2737              i += num_merged;
2738            }
2739          while (i < nfiles);
2740        }
2741    }
2742
2743  return nfiles;
2744}
2745
2746/* Merge the input FILES.  NTEMPS is the number of files at the
2747   start of FILES that are temporary; it is zero at the top level.
2748   NFILES is the total number of files.  Put the output in
2749   OUTPUT_FILE; a null OUTPUT_FILE stands for standard output.  */
2750
2751static void
2752merge (struct sortfile *files, size_t ntemps, size_t nfiles,
2753       char const *output_file)
2754{
2755  while (nmerge < nfiles)
2756    {
2757      /* Number of input files processed so far.  */
2758      size_t in;
2759
2760      /* Number of output files generated so far.  */
2761      size_t out;
2762
2763      /* nfiles % NMERGE; this counts input files that are left over
2764         after all full-sized merges have been done.  */
2765      size_t remainder;
2766
2767      /* Number of easily-available slots at the next loop iteration.  */
2768      size_t cheap_slots;
2769
2770      /* Do as many NMERGE-size merges as possible. In the case that
2771         nmerge is bogus, increment by the maximum number of file
2772         descriptors allowed.  */
2773      for (out = in = 0; nmerge <= nfiles - in; out++)
2774        {
2775          FILE *tfp;
2776          pid_t pid;
2777          char *temp = create_temp (&tfp, &pid);
2778          size_t num_merged = mergefiles (&files[in], MIN (ntemps, nmerge),
2779                                          nmerge, tfp, temp);
2780          ntemps -= MIN (ntemps, num_merged);
2781          files[out].name = temp;
2782          files[out].pid = pid;
2783          in += num_merged;
2784        }
2785
2786      remainder = nfiles - in;
2787      cheap_slots = nmerge - out % nmerge;
2788
2789      if (cheap_slots < remainder)
2790        {
2791          /* So many files remain that they can't all be put into the last
2792             NMERGE-sized output window.  Do one more merge.  Merge as few
2793             files as possible, to avoid needless I/O.  */
2794          size_t nshortmerge = remainder - cheap_slots + 1;
2795          FILE *tfp;
2796          pid_t pid;
2797          char *temp = create_temp (&tfp, &pid);
2798          size_t num_merged = mergefiles (&files[in], MIN (ntemps, nshortmerge),
2799                                          nshortmerge, tfp, temp);
2800          ntemps -= MIN (ntemps, num_merged);
2801          files[out].name = temp;
2802          files[out++].pid = pid;
2803          in += num_merged;
2804        }
2805
2806      /* Put the remaining input files into the last NMERGE-sized output
2807         window, so they will be merged in the next pass.  */
2808      memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
2809      ntemps += out;
2810      nfiles -= in - out;
2811    }
2812
2813  nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
2814
2815  /* We aren't guaranteed that this final mergefiles will work, therefore we
2816     try to merge into the output, and then merge as much as we can into a
2817     temp file if we can't. Repeat.  */
2818
2819  for (;;)
2820    {
2821      /* Merge directly into the output file if possible.  */
2822      FILE **fps;
2823      size_t nopened = open_input_files (files, nfiles, &fps);
2824
2825      if (nopened == nfiles)
2826        {
2827          FILE *ofp = stream_open (output_file, "w");
2828          if (ofp)
2829            {
2830              mergefps (files, ntemps, nfiles, ofp, output_file, fps);
2831              break;
2832            }
2833          if (errno != EMFILE || nopened <= 2)
2834            die (_("open failed"), output_file);
2835        }
2836      else if (nopened <= 2)
2837        die (_("open failed"), files[nopened].name);
2838
2839      /* We ran out of file descriptors.  Close one of the input
2840         files, to gain a file descriptor.  Then create a temporary
2841         file with our spare file descriptor.  Retry if that failed
2842         (e.g., some other process could open a file between the time
2843         we closed and tried to create).  */
2844      {
2845      FILE *tfp;
2846      pid_t pid;
2847      char *temp;
2848      do
2849        {
2850          nopened--;
2851          xfclose (fps[nopened], files[nopened].name);
2852          temp = maybe_create_temp (&tfp, &pid, ! (nopened <= 2));
2853        }
2854      while (!temp);
2855
2856      /* Merge into the newly allocated temporary.  */
2857      mergefps (&files[0], MIN (ntemps, nopened), nopened, tfp, temp, fps);
2858      ntemps -= MIN (ntemps, nopened);
2859      files[0].name = temp;
2860      files[0].pid = pid;
2861
2862      memmove (&files[1], &files[nopened], (nfiles - nopened) * sizeof *files);
2863      ntemps++;
2864      nfiles -= nopened - 1;
2865      }
2866    }
2867}
2868
2869/* Sort NFILES FILES onto OUTPUT_FILE. */
2870
2871static void
2872sort (char * const *files, size_t nfiles, char const *output_file)
2873{
2874  struct buffer buf;
2875  size_t ntemps = 0;
2876  bool output_file_created = false;
2877
2878  buf.alloc = 0;
2879
2880  while (nfiles)
2881    {
2882      char const *temp_output;
2883      char const *file = *files;
2884      FILE *fp = xfopen (file, "r");
2885      FILE *tfp;
2886      size_t bytes_per_line = (2 * sizeof (struct line)
2887                               - sizeof (struct line) / 2);
2888
2889      if (! buf.alloc)
2890        initbuf (&buf, bytes_per_line,
2891                 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2892      buf.eof = false;
2893      files++;
2894      nfiles--;
2895
2896      while (fillbuf (&buf, fp, file))
2897        {
2898          struct line *line;
2899          struct line *linebase;
2900
2901          if (buf.eof && nfiles
2902              && (bytes_per_line + 1
2903                  < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2904            {
2905              /* End of file, but there is more input and buffer room.
2906                 Concatenate the next input file; this is faster in
2907                 the usual case.  */
2908              buf.left = buf.used;
2909              break;
2910            }
2911
2912          line = buffer_linelim (&buf);
2913          linebase = line - buf.nlines;
2914          if (1 < buf.nlines)
2915            sortlines (line, buf.nlines, linebase);
2916          if (buf.eof && !nfiles && !ntemps && !buf.left)
2917            {
2918              xfclose (fp, file);
2919              tfp = xfopen (output_file, "w");
2920              temp_output = output_file;
2921              output_file_created = true;
2922            }
2923          else
2924            {
2925              ++ntemps;
2926              temp_output = create_temp (&tfp, NULL);
2927            }
2928
2929          do
2930            {
2931              line--;
2932              write_bytes (line->text, line->length, tfp, temp_output);
2933              if (unique)
2934                while (linebase < line && compare (line, line - 1) == 0)
2935                  line--;
2936            }
2937          while (linebase < line);
2938
2939          xfclose (tfp, temp_output);
2940
2941          /* Free up some resources every once in a while.  */
2942          if (MAX_PROCS_BEFORE_REAP < nprocs)
2943            reap_some ();
2944
2945          if (output_file_created)
2946            goto finish;
2947        }
2948      xfclose (fp, file);
2949    }
2950
2951 finish:
2952  free (buf.buf);
2953
2954  if (! output_file_created)
2955    {
2956      size_t i;
2957      struct tempnode *node = temphead;
2958      struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
2959      for (i = 0; node; i++)
2960        {
2961          tempfiles[i].name = node->name;
2962          tempfiles[i].pid = node->pid;
2963          node = node->next;
2964        }
2965      merge (tempfiles, ntemps, ntemps, output_file);
2966      free (tempfiles);
2967    }
2968}
2969
2970/* Insert a malloc'd copy of key KEY_ARG at the end of the key list.  */
2971
2972static void
2973insertkey (struct keyfield *key_arg)
2974{
2975  struct keyfield **p;
2976  struct keyfield *key = xmemdup (key_arg, sizeof *key);
2977
2978  for (p = &keylist; *p; p = &(*p)->next)
2979    continue;
2980  *p = key;
2981  key->next = NULL;
2982}
2983
2984/* Report a bad field specification SPEC, with extra info MSGID.  */
2985
2986static void badfieldspec (char const *, char const *)
2987     ATTRIBUTE_NORETURN;
2988static void
2989badfieldspec (char const *spec, char const *msgid)
2990{
2991  error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
2992         _(msgid), quote (spec));
2993  abort ();
2994}
2995
2996/* Report incompatible options.  */
2997
2998static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
2999static void
3000incompatible_options (char const *opts)
3001{
3002  error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
3003  abort ();
3004}
3005
3006/* Check compatibility of ordering options.  */
3007
3008static void
3009check_ordering_compatibility (void)
3010{
3011  struct keyfield const *key;
3012
3013  for (key = keylist; key; key = key->next)
3014    if ((1 < (key->random + key->numeric + key->general_numeric + key->month
3015              + key->version + !!key->ignore + key->human_numeric))
3016        || (key->random && key->translate))
3017      {
3018        /* The following is too big, but guaranteed to be "big enough". */
3019        char opts[sizeof short_options];
3020        char *p = opts;
3021        if (key->ignore == nondictionary)
3022          *p++ = 'd';
3023        if (key->translate)
3024          *p++ = 'f';
3025        if (key->general_numeric)
3026          *p++ = 'g';
3027        if (key->human_numeric)
3028          *p++ = 'h';
3029        if (key->ignore == nonprinting)
3030          *p++ = 'i';
3031        if (key->month)
3032          *p++ = 'M';
3033        if (key->numeric)
3034          *p++ = 'n';
3035        if (key->version)
3036          *p++ = 'V';
3037        if (key->random)
3038          *p++ = 'R';
3039        *p = '\0';
3040        incompatible_options (opts);
3041      }
3042}
3043
3044/* Parse the leading integer in STRING and store the resulting value
3045   (which must fit into size_t) into *VAL.  Return the address of the
3046   suffix after the integer.  If the value is too large, silently
3047   substitute SIZE_MAX.  If MSGID is NULL, return NULL after
3048   failure; otherwise, report MSGID and exit on failure.  */
3049
3050static char const *
3051parse_field_count (char const *string, size_t *val, char const *msgid)
3052{
3053  char *suffix;
3054  uintmax_t n;
3055
3056  switch (xstrtoumax (string, &suffix, 10, &n, ""))
3057    {
3058    case LONGINT_OK:
3059    case LONGINT_INVALID_SUFFIX_CHAR:
3060      *val = n;
3061      if (*val == n)
3062        break;
3063      /* Fall through.  */
3064    case LONGINT_OVERFLOW:
3065    case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
3066      *val = SIZE_MAX;
3067      break;
3068
3069    case LONGINT_INVALID:
3070      if (msgid)
3071        error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
3072               _(msgid), quote (string));
3073      return NULL;
3074    }
3075
3076  return suffix;
3077}
3078
3079/* Handle interrupts and hangups. */
3080
3081static void
3082sighandler (int sig)
3083{
3084  if (! SA_NOCLDSTOP)
3085    signal (sig, SIG_IGN);
3086
3087  cleanup ();
3088
3089  signal (sig, SIG_DFL);
3090  raise (sig);
3091}
3092
3093/* Set the ordering options for KEY specified in S.
3094   Return the address of the first character in S that
3095   is not a valid ordering option.
3096   BLANKTYPE is the kind of blanks that 'b' should skip. */
3097
3098static char *
3099set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
3100{
3101  while (*s)
3102    {
3103      switch (*s)
3104        {
3105        case 'b':
3106          if (blanktype == bl_start || blanktype == bl_both)
3107            key->skipsblanks = true;
3108          if (blanktype == bl_end || blanktype == bl_both)
3109            key->skipeblanks = true;
3110          break;
3111        case 'd':
3112          key->ignore = nondictionary;
3113          break;
3114        case 'f':
3115          key->translate = fold_toupper;
3116          break;
3117        case 'g':
3118          key->general_numeric = true;
3119          break;
3120        case 'h':
3121          key->human_numeric = true;
3122          break;
3123        case 'i':
3124          /* Option order should not matter, so don't let -i override
3125             -d.  -d implies -i, but -i does not imply -d.  */
3126          if (! key->ignore)
3127            key->ignore = nonprinting;
3128          break;
3129        case 'M':
3130          key->month = true;
3131          break;
3132        case 'n':
3133          key->numeric = true;
3134          break;
3135        case 'R':
3136          key->random = true;
3137          break;
3138        case 'r':
3139          key->reverse = true;
3140          break;
3141        case 'V':
3142          key->version = true;
3143          break;
3144        default:
3145          return (char *) s;
3146        }
3147      ++s;
3148    }
3149  return (char *) s;
3150}
3151
3152static struct keyfield *
3153key_init (struct keyfield *key)
3154{
3155  memset (key, 0, sizeof *key);
3156  key->eword = SIZE_MAX;
3157  key->si_present = -1;
3158  return key;
3159}
3160
3161int
3162main (int argc, char **argv)
3163{
3164  struct keyfield *key;
3165  struct keyfield key_buf;
3166  struct keyfield gkey;
3167  char const *s;
3168  int c = 0;
3169  char checkonly = 0;
3170  bool mergeonly = false;
3171  char *random_source = NULL;
3172  bool need_random = false;
3173  size_t nfiles = 0;
3174  bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
3175  bool obsolete_usage = (posix2_version () < 200112);
3176  char **files;
3177  char *files_from = NULL;
3178  struct Tokens tok;
3179  char const *outfile = NULL;
3180
3181  initialize_main (&argc, &argv);
3182  set_program_name (argv[0]);
3183  setlocale (LC_ALL, "");
3184  bindtextdomain (PACKAGE, LOCALEDIR);
3185  textdomain (PACKAGE);
3186
3187  initialize_exit_failure (SORT_FAILURE);
3188
3189  hard_LC_COLLATE = hard_locale (LC_COLLATE);
3190#if HAVE_NL_LANGINFO
3191  hard_LC_TIME = hard_locale (LC_TIME);
3192#endif
3193
3194  /* Get locale's representation of the decimal point.  */
3195  {
3196    struct lconv const *locale = localeconv ();
3197
3198    /* If the locale doesn't define a decimal point, or if the decimal
3199       point is multibyte, use the C locale's decimal point.  FIXME:
3200       add support for multibyte decimal points.  */
3201    decimal_point = to_uchar (locale->decimal_point[0]);
3202    if (! decimal_point || locale->decimal_point[1])
3203      decimal_point = '.';
3204
3205    /* FIXME: add support for multibyte thousands separators.  */
3206    thousands_sep = to_uchar (*locale->thousands_sep);
3207    if (! thousands_sep || locale->thousands_sep[1])
3208      thousands_sep = -1;
3209  }
3210
3211  have_read_stdin = false;
3212  inittables ();
3213
3214  {
3215    size_t i;
3216    static int const sig[] =
3217      {
3218        /* The usual suspects.  */
3219        SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
3220#ifdef SIGPOLL
3221        SIGPOLL,
3222#endif
3223#ifdef SIGPROF
3224        SIGPROF,
3225#endif
3226#ifdef SIGVTALRM
3227        SIGVTALRM,
3228#endif
3229#ifdef SIGXCPU
3230        SIGXCPU,
3231#endif
3232#ifdef SIGXFSZ
3233        SIGXFSZ,
3234#endif
3235      };
3236    enum { nsigs = ARRAY_CARDINALITY (sig) };
3237
3238#if SA_NOCLDSTOP
3239    struct sigaction act;
3240
3241    sigemptyset (&caught_signals);
3242    for (i = 0; i < nsigs; i++)
3243      {
3244        sigaction (sig[i], NULL, &act);
3245        if (act.sa_handler != SIG_IGN)
3246          sigaddset (&caught_signals, sig[i]);
3247      }
3248
3249    act.sa_handler = sighandler;
3250    act.sa_mask = caught_signals;
3251    act.sa_flags = 0;
3252
3253    for (i = 0; i < nsigs; i++)
3254      if (sigismember (&caught_signals, sig[i]))
3255        sigaction (sig[i], &act, NULL);
3256#else
3257    for (i = 0; i < nsigs; i++)
3258      if (signal (sig[i], SIG_IGN) != SIG_IGN)
3259        {
3260          signal (sig[i], sighandler);
3261          siginterrupt (sig[i], 1);
3262        }
3263#endif
3264  }
3265  signal (SIGCHLD, SIG_DFL); /* Don't inherit CHLD handling from parent.  */
3266
3267  /* The signal mask is known, so it is safe to invoke exit_cleanup.  */
3268  atexit (exit_cleanup);
3269
3270  gkey.sword = gkey.eword = SIZE_MAX;
3271  gkey.ignore = NULL;
3272  gkey.translate = NULL;
3273  gkey.numeric = gkey.general_numeric = gkey.human_numeric = false;
3274  gkey.si_present = -1;
3275  gkey.random = gkey.version = false;
3276  gkey.month = gkey.reverse = false;
3277  gkey.skipsblanks = gkey.skipeblanks = false;
3278
3279  files = xnmalloc (argc, sizeof *files);
3280
3281  for (;;)
3282    {
3283      /* Parse an operand as a file after "--" was seen; or if
3284         pedantic and a file was seen, unless the POSIX version
3285         predates 1003.1-2001 and -c was not seen and the operand is
3286         "-o FILE" or "-oFILE".  */
3287      int oi = -1;
3288
3289      if (c == -1
3290          || (posixly_correct && nfiles != 0
3291              && ! (obsolete_usage
3292                    && ! checkonly
3293                    && optind != argc
3294                    && argv[optind][0] == '-' && argv[optind][1] == 'o'
3295                    && (argv[optind][2] || optind + 1 != argc)))
3296          || ((c = getopt_long (argc, argv, short_options,
3297                                long_options, &oi))
3298              == -1))
3299        {
3300          if (argc <= optind)
3301            break;
3302          files[nfiles++] = argv[optind++];
3303        }
3304      else switch (c)
3305        {
3306        case 1:
3307          key = NULL;
3308          if (optarg[0] == '+')
3309            {
3310              bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
3311                                      && ISDIGIT (argv[optind][1]));
3312              obsolete_usage |= minus_pos_usage && !posixly_correct;
3313              if (obsolete_usage)
3314                {
3315                  /* Treat +POS1 [-POS2] as a key if possible; but silently
3316                     treat an operand as a file if it is not a valid +POS1.  */
3317                  key = key_init (&key_buf);
3318                  s = parse_field_count (optarg + 1, &key->sword, NULL);
3319                  if (s && *s == '.')
3320                    s = parse_field_count (s + 1, &key->schar, NULL);
3321                  if (! (key->sword || key->schar))
3322                    key->sword = SIZE_MAX;
3323                  if (! s || *set_ordering (s, key, bl_start))
3324                    key = NULL;
3325                  else
3326                    {
3327                      if (minus_pos_usage)
3328                        {
3329                          char const *optarg1 = argv[optind++];
3330                          s = parse_field_count (optarg1 + 1, &key->eword,
3331                                             N_("invalid number after `-'"));
3332                          if (*s == '.')
3333                            s = parse_field_count (s + 1, &key->echar,
3334                                               N_("invalid number after `.'"));
3335                          if (*set_ordering (s, key, bl_end))
3336                            badfieldspec (optarg1,
3337                                      N_("stray character in field spec"));
3338                        }
3339                      insertkey (key);
3340                    }
3341                }
3342            }
3343          if (! key)
3344            files[nfiles++] = optarg;
3345          break;
3346
3347        case SORT_OPTION:
3348          c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
3349          /* Fall through. */
3350        case 'b':
3351        case 'd':
3352        case 'f':
3353        case 'g':
3354        case 'h':
3355        case 'i':
3356        case 'M':
3357        case 'n':
3358        case 'r':
3359        case 'R':
3360        case 'V':
3361          {
3362            char str[2];
3363            str[0] = c;
3364            str[1] = '\0';
3365            set_ordering (str, &gkey, bl_both);
3366          }
3367          break;
3368
3369        case CHECK_OPTION:
3370          c = (optarg
3371               ? XARGMATCH ("--check", optarg, check_args, check_types)
3372               : 'c');
3373          /* Fall through.  */
3374        case 'c':
3375        case 'C':
3376          if (checkonly && checkonly != c)
3377            incompatible_options ("cC");
3378          checkonly = c;
3379          break;
3380
3381        case COMPRESS_PROGRAM_OPTION:
3382          if (compress_program && !STREQ (compress_program, optarg))
3383            error (SORT_FAILURE, 0, _("multiple compress programs specified"));
3384          compress_program = optarg;
3385          break;
3386
3387        case FILES0_FROM_OPTION:
3388          files_from = optarg;
3389          break;
3390
3391        case 'k':
3392          key = key_init (&key_buf);
3393
3394          /* Get POS1. */
3395          s = parse_field_count (optarg, &key->sword,
3396                                 N_("invalid number at field start"));
3397          if (! key->sword--)
3398            {
3399              /* Provoke with `sort -k0' */
3400              badfieldspec (optarg, N_("field number is zero"));
3401            }
3402          if (*s == '.')
3403            {
3404              s = parse_field_count (s + 1, &key->schar,
3405                                     N_("invalid number after `.'"));
3406              if (! key->schar--)
3407                {
3408                  /* Provoke with `sort -k1.0' */
3409                  badfieldspec (optarg, N_("character offset is zero"));
3410                }
3411            }
3412          if (! (key->sword || key->schar))
3413            key->sword = SIZE_MAX;
3414          s = set_ordering (s, key, bl_start);
3415          if (*s != ',')
3416            {
3417              key->eword = SIZE_MAX;
3418              key->echar = 0;
3419            }
3420          else
3421            {
3422              /* Get POS2. */
3423              s = parse_field_count (s + 1, &key->eword,
3424                                     N_("invalid number after `,'"));
3425              if (! key->eword--)
3426                {
3427                  /* Provoke with `sort -k1,0' */
3428                  badfieldspec (optarg, N_("field number is zero"));
3429                }
3430              if (*s == '.')
3431                {
3432                  s = parse_field_count (s + 1, &key->echar,
3433                                         N_("invalid number after `.'"));
3434                }
3435              s = set_ordering (s, key, bl_end);
3436            }
3437          if (*s)
3438            badfieldspec (optarg, N_("stray character in field spec"));
3439          insertkey (key);
3440          break;
3441
3442        case 'm':
3443          mergeonly = true;
3444          break;
3445
3446        case NMERGE_OPTION:
3447          specify_nmerge (oi, c, optarg);
3448          break;
3449
3450        case 'o':
3451          if (outfile && !STREQ (outfile, optarg))
3452            error (SORT_FAILURE, 0, _("multiple output files specified"));
3453          outfile = optarg;
3454          break;
3455
3456        case RANDOM_SOURCE_OPTION:
3457          if (random_source && !STREQ (random_source, optarg))
3458            error (SORT_FAILURE, 0, _("multiple random sources specified"));
3459          random_source = optarg;
3460          break;
3461
3462        case 's':
3463          stable = true;
3464          break;
3465
3466        case 'S':
3467          specify_sort_size (oi, c, optarg);
3468          break;
3469
3470        case 't':
3471          {
3472            char newtab = optarg[0];
3473            if (! newtab)
3474              error (SORT_FAILURE, 0, _("empty tab"));
3475            if (optarg[1])
3476              {
3477                if (STREQ (optarg, "\\0"))
3478                  newtab = '\0';
3479                else
3480                  {
3481                    /* Provoke with `sort -txx'.  Complain about
3482                       "multi-character tab" instead of "multibyte tab", so
3483                       that the diagnostic's wording does not need to be
3484                       changed once multibyte characters are supported.  */
3485                    error (SORT_FAILURE, 0, _("multi-character tab %s"),
3486                           quote (optarg));
3487                  }
3488              }
3489            if (tab != TAB_DEFAULT && tab != newtab)
3490              error (SORT_FAILURE, 0, _("incompatible tabs"));
3491            tab = newtab;
3492          }
3493          break;
3494
3495        case 'T':
3496          add_temp_dir (optarg);
3497          break;
3498
3499        case 'u':
3500          unique = true;
3501          break;
3502
3503        case 'y':
3504          /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
3505             through Solaris 7.  It is also accepted by many non-Solaris
3506             "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
3507             -y is marked as obsolete starting with Solaris 8 (1999), but is
3508             still accepted as of Solaris 10 prerelease (2004).
3509
3510             Solaris 2.5.1 "sort -y 100" reads the input file "100", but
3511             emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
3512             and which in general ignores the argument after "-y" if it
3513             consists entirely of digits (it can even be empty).  */
3514          if (optarg == argv[optind - 1])
3515            {
3516              char const *p;
3517              for (p = optarg; ISDIGIT (*p); p++)
3518                continue;
3519              optind -= (*p != '\0');
3520            }
3521          break;
3522
3523        case 'z':
3524          eolchar = 0;
3525          break;
3526
3527        case_GETOPT_HELP_CHAR;
3528
3529        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3530
3531        default:
3532          usage (SORT_FAILURE);
3533        }
3534    }
3535
3536  if (files_from)
3537    {
3538      FILE *stream;
3539
3540      /* When using --files0-from=F, you may not specify any files
3541         on the command-line.  */
3542      if (nfiles)
3543        {
3544          error (0, 0, _("extra operand %s"), quote (files[0]));
3545          fprintf (stderr, "%s\n",
3546                   _("file operands cannot be combined with --files0-from"));
3547          usage (SORT_FAILURE);
3548        }
3549
3550      if (STREQ (files_from, "-"))
3551        stream = stdin;
3552      else
3553        {
3554          stream = fopen (files_from, "r");
3555          if (stream == NULL)
3556            error (SORT_FAILURE, errno, _("cannot open %s for reading"),
3557                   quote (files_from));
3558        }
3559
3560      readtokens0_init (&tok);
3561
3562      if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
3563        error (SORT_FAILURE, 0, _("cannot read file names from %s"),
3564               quote (files_from));
3565
3566      if (tok.n_tok)
3567        {
3568          size_t i;
3569          free (files);
3570          files = tok.tok;
3571          nfiles = tok.n_tok;
3572          for (i = 0; i < nfiles; i++)
3573          {
3574              if (STREQ (files[i], "-"))
3575                error (SORT_FAILURE, 0, _("when reading file names from stdin, "
3576                                          "no file name of %s allowed"),
3577                       quote (files[i]));
3578              else if (files[i][0] == '\0')
3579                {
3580                  /* Using the standard `filename:line-number:' prefix here is
3581                     not totally appropriate, since NUL is the separator, not NL,
3582                     but it might be better than nothing.  */
3583                  unsigned long int file_number = i + 1;
3584                  error (SORT_FAILURE, 0,
3585                         _("%s:%lu: invalid zero-length file name"),
3586                         quotearg_colon (files_from), file_number);
3587                }
3588          }
3589        }
3590      else
3591        error (SORT_FAILURE, 0, _("no input from %s"),
3592               quote (files_from));
3593    }
3594
3595  /* Inheritance of global options to individual keys. */
3596  for (key = keylist; key; key = key->next)
3597    {
3598      if (! (key->ignore
3599             || key->translate
3600             || (key->skipsblanks
3601                 || key->reverse
3602                 || key->skipeblanks
3603                 || key->month
3604                 || key->numeric
3605                 || key->version
3606                 || key->general_numeric
3607                 || key->human_numeric
3608                 || key->random)))
3609        {
3610          key->ignore = gkey.ignore;
3611          key->translate = gkey.translate;
3612          key->skipsblanks = gkey.skipsblanks;
3613          key->skipeblanks = gkey.skipeblanks;
3614          key->month = gkey.month;
3615          key->numeric = gkey.numeric;
3616          key->general_numeric = gkey.general_numeric;
3617          key->human_numeric = gkey.human_numeric;
3618          key->random = gkey.random;
3619          key->reverse = gkey.reverse;
3620          key->version = gkey.version;
3621        }
3622
3623      need_random |= key->random;
3624    }
3625
3626  if (!keylist && (gkey.ignore
3627                   || gkey.translate
3628                   || (gkey.skipsblanks
3629                       || gkey.skipeblanks
3630                       || gkey.month
3631                       || gkey.numeric
3632                       || gkey.general_numeric
3633                       || gkey.human_numeric
3634                       || gkey.random
3635                       || gkey.version)))
3636    {
3637      insertkey (&gkey);
3638      need_random |= gkey.random;
3639    }
3640
3641  check_ordering_compatibility ();
3642
3643  reverse = gkey.reverse;
3644
3645  if (need_random)
3646    {
3647      randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
3648      if (! randread_source)
3649        die (_("open failed"), random_source);
3650    }
3651
3652  if (temp_dir_count == 0)
3653    {
3654      char const *tmp_dir = getenv ("TMPDIR");
3655      add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3656    }
3657
3658  if (nfiles == 0)
3659    {
3660      static char *minus = (char *) "-";
3661      nfiles = 1;
3662      free (files);
3663      files = &minus;
3664    }
3665
3666  /* Need to re-check that we meet the minimum requirement for memory
3667     usage with the final value for NMERGE. */
3668  if (0 < sort_size)
3669    sort_size = MAX (sort_size, MIN_SORT_SIZE);
3670
3671  if (checkonly)
3672    {
3673      if (nfiles > 1)
3674        error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
3675               quote (files[1]), checkonly);
3676
3677      if (outfile)
3678        {
3679          static char opts[] = {0, 'o', 0};
3680          opts[0] = checkonly;
3681          incompatible_options (opts);
3682        }
3683
3684      /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
3685         input is not properly sorted.  */
3686      exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3687    }
3688
3689  if (mergeonly)
3690    {
3691      struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
3692      size_t i;
3693
3694      for (i = 0; i < nfiles; ++i)
3695        sortfiles[i].name = files[i];
3696
3697      merge (sortfiles, 0, nfiles, outfile);
3698      IF_LINT (free (sortfiles));
3699    }
3700  else
3701    sort (files, nfiles, outfile);
3702
3703  if (have_read_stdin && fclose (stdin) == EOF)
3704    die (_("close failed"), "-");
3705
3706  exit (EXIT_SUCCESS);
3707}
3708