1/* sort - sort lines of text (with all kinds of options).
2   Copyright (C) 1988, 1991-2005 Free Software Foundation, Inc.
3
4   This program is free software; you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 2, or (at your option)
7   any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program; if not, write to the Free Software Foundation,
16   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17
18   Written December 1988 by Mike Haertel.
19   The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20   or (US mail) as Mike Haertel c/o Free Software Foundation.
21
22   Ørn E. Hansen added NLS support in 1997.  */
23
24#include <config.h>
25
26#include <getopt.h>
27#include <sys/types.h>
28#include <signal.h>
29#include "system.h"
30#include "error.h"
31#include "hard-locale.h"
32#include "inttostr.h"
33#include "physmem.h"
34#include "posixver.h"
35#include "quote.h"
36#include "stdlib--.h"
37#include "stdio--.h"
38#include "strnumcmp.h"
39#include "xmemcoll.h"
40#include "xstrtol.h"
41
42#if HAVE_SYS_RESOURCE_H
43# include <sys/resource.h>
44#endif
45#ifndef RLIMIT_DATA
46struct rlimit { size_t rlim_cur; };
47# define getrlimit(Resource, Rlp) (-1)
48#endif
49
50/* The official name of this program (e.g., no `g' prefix).  */
51#define PROGRAM_NAME "sort"
52
53#define AUTHORS "Mike Haertel", "Paul Eggert"
54
55#if HAVE_LANGINFO_CODESET
56# include <langinfo.h>
57#endif
58
59/* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
60   present.  */
61#ifndef SA_NOCLDSTOP
62# define SA_NOCLDSTOP 0
63# define sigprocmask(How, Set, Oset) /* empty */
64# define sigset_t int
65# if ! HAVE_SIGINTERRUPT
66#  define siginterrupt(sig, flag) /* empty */
67# endif
68#endif
69
70#ifndef STDC_HEADERS
71double strtod ();
72#endif
73
74#define UCHAR_LIM (UCHAR_MAX + 1)
75
76#ifndef DEFAULT_TMPDIR
77# define DEFAULT_TMPDIR "/tmp"
78#endif
79
80/* Exit statuses.  */
81enum
82  {
83    /* POSIX says to exit with status 1 if invoked with -c and the
84       input is not properly sorted.  */
85    SORT_OUT_OF_ORDER = 1,
86
87    /* POSIX says any other irregular exit must exit with a status
88       code greater than 1.  */
89    SORT_FAILURE = 2
90  };
91
92/* The representation of the decimal point in the current locale.  */
93static int decimal_point;
94
95/* Thousands separator; if -1, then there isn't one.  */
96static int thousands_sep;
97
98/* Nonzero if the corresponding locales are hard.  */
99static bool hard_LC_COLLATE;
100#if HAVE_NL_LANGINFO
101static bool hard_LC_TIME;
102#endif
103
104#define NONZERO(x) ((x) != 0)
105
106/* The kind of blanks for '-b' to skip in various options. */
107enum blanktype { bl_start, bl_end, bl_both };
108
109/* The character marking end of line. Default to \n. */
110static char eolchar = '\n';
111
112/* Lines are held in core as counted strings. */
113struct line
114{
115  char *text;			/* Text of the line. */
116  size_t length;		/* Length including final newline. */
117  char *keybeg;			/* Start of first key. */
118  char *keylim;			/* Limit of first key. */
119};
120
121/* Input buffers. */
122struct buffer
123{
124  char *buf;			/* Dynamically allocated buffer,
125				   partitioned into 3 regions:
126				   - input data;
127				   - unused area;
128				   - an array of lines, in reverse order.  */
129  size_t used;			/* Number of bytes used for input data.  */
130  size_t nlines;		/* Number of lines in the line array.  */
131  size_t alloc;			/* Number of bytes allocated. */
132  size_t left;			/* Number of bytes left from previous reads. */
133  size_t line_bytes;		/* Number of bytes to reserve for each line. */
134  bool eof;			/* An EOF has been read.  */
135};
136
137struct keyfield
138{
139  size_t sword;			/* Zero-origin 'word' to start at. */
140  size_t schar;			/* Additional characters to skip. */
141  size_t eword;			/* Zero-origin first word after field. */
142  size_t echar;			/* Additional characters in field. */
143  bool const *ignore;		/* Boolean array of characters to ignore. */
144  char const *translate;	/* Translation applied to characters. */
145  bool skipsblanks;		/* Skip leading blanks when finding start.  */
146  bool skipeblanks;		/* Skip leading blanks when finding end.  */
147  bool numeric;			/* Flag for numeric comparison.  Handle
148				   strings of digits with optional decimal
149				   point, but no exponential notation. */
150  bool general_numeric;		/* Flag for general, numeric comparison.
151				   Handle numbers in exponential notation. */
152  bool month;			/* Flag for comparison by month name. */
153  bool reverse;			/* Reverse the sense of comparison. */
154  struct keyfield *next;	/* Next keyfield to try. */
155};
156
157struct month
158{
159  char const *name;
160  int val;
161};
162
163/* The name this program was run with. */
164char *program_name;
165
166/* FIXME: None of these tables work with multibyte character sets.
167   Also, there are many other bugs when handling multibyte characters.
168   One way to fix this is to rewrite `sort' to use wide characters
169   internally, but doing this with good performance is a bit
170   tricky.  */
171
172/* Table of blanks.  */
173static bool blanks[UCHAR_LIM];
174
175/* Table of non-printing characters. */
176static bool nonprinting[UCHAR_LIM];
177
178/* Table of non-dictionary characters (not letters, digits, or blanks). */
179static bool nondictionary[UCHAR_LIM];
180
181/* Translation table folding lower case to upper.  */
182static char fold_toupper[UCHAR_LIM];
183
184#define MONTHS_PER_YEAR 12
185
186/* Table mapping month names to integers.
187   Alphabetic order allows binary search. */
188static struct month monthtab[] =
189{
190  {"APR", 4},
191  {"AUG", 8},
192  {"DEC", 12},
193  {"FEB", 2},
194  {"JAN", 1},
195  {"JUL", 7},
196  {"JUN", 6},
197  {"MAR", 3},
198  {"MAY", 5},
199  {"NOV", 11},
200  {"OCT", 10},
201  {"SEP", 9}
202};
203
204/* During the merge phase, the number of files to merge at once. */
205#define NMERGE 16
206
207/* Minimum size for a merge or check buffer.  */
208#define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
209
210/* Minimum sort size; the code might not work with smaller sizes.  */
211#define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
212
213/* The number of bytes needed for a merge or check buffer, which can
214   function relatively efficiently even if it holds only one line.  If
215   a longer line is seen, this value is increased.  */
216static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
217
218/* The approximate maximum number of bytes of main memory to use, as
219   specified by the user.  Zero if the user has not specified a size.  */
220static size_t sort_size;
221
222/* The guessed size for non-regular files.  */
223#define INPUT_FILE_SIZE_GUESS (1024 * 1024)
224
225/* Array of directory names in which any temporary files are to be created. */
226static char const **temp_dirs;
227
228/* Number of temporary directory names used.  */
229static size_t temp_dir_count;
230
231/* Number of allocated slots in temp_dirs.  */
232static size_t temp_dir_alloc;
233
234/* Flag to reverse the order of all comparisons. */
235static bool reverse;
236
237/* Flag for stable sort.  This turns off the last ditch bytewise
238   comparison of lines, and instead leaves lines in the same order
239   they were read if all keys compare equal.  */
240static bool stable;
241
242/* If TAB has this value, blanks separate fields.  */
243enum { TAB_DEFAULT = CHAR_MAX + 1 };
244
245/* Tab character separating fields.  If TAB_DEFAULT, then fields are
246   separated by the empty string between a non-blank character and a blank
247   character. */
248static int tab = TAB_DEFAULT;
249
250/* Flag to remove consecutive duplicate lines from the output.
251   Only the last of a sequence of equal lines will be output. */
252static bool unique;
253
254/* Nonzero if any of the input files are the standard input. */
255static bool have_read_stdin;
256
257/* List of key field comparisons to be tried.  */
258static struct keyfield *keylist;
259
260static void sortlines_temp (struct line *, size_t, struct line *);
261
262/* Report MESSAGE for FILE, then clean up and exit.
263   If FILE is null, it represents standard output.  */
264
265static void die (char const *, char const *) ATTRIBUTE_NORETURN;
266static void
267die (char const *message, char const *file)
268{
269  error (0, errno, "%s: %s", message, file ? file : _("standard output"));
270  exit (SORT_FAILURE);
271}
272
273void
274usage (int status)
275{
276  if (status != EXIT_SUCCESS)
277    fprintf (stderr, _("Try `%s --help' for more information.\n"),
278	     program_name);
279  else
280    {
281      printf (_("\
282Usage: %s [OPTION]... [FILE]...\n\
283"),
284	      program_name);
285      fputs (_("\
286Write sorted concatenation of all FILE(s) to standard output.\n\
287\n\
288"), stdout);
289      fputs (_("\
290Mandatory arguments to long options are mandatory for short options too.\n\
291"), stdout);
292      fputs (_("\
293Ordering options:\n\
294\n\
295"), stdout);
296      fputs (_("\
297  -b, --ignore-leading-blanks  ignore leading blanks\n\
298  -d, --dictionary-order      consider only blanks and alphanumeric characters\n\
299  -f, --ignore-case           fold lower case to upper case characters\n\
300"), stdout);
301      fputs (_("\
302  -g, --general-numeric-sort  compare according to general numerical value\n\
303  -i, --ignore-nonprinting    consider only printable characters\n\
304  -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
305  -n, --numeric-sort          compare according to string numerical value\n\
306  -r, --reverse               reverse the result of comparisons\n\
307\n\
308"), stdout);
309      fputs (_("\
310Other options:\n\
311\n\
312  -c, --check               check whether input is sorted; do not sort\n\
313  -k, --key=POS1[,POS2]     start a key at POS1, end it at POS2 (origin 1)\n\
314  -m, --merge               merge already sorted files; do not sort\n\
315  -o, --output=FILE         write result to FILE instead of standard output\n\
316  -s, --stable              stabilize sort by disabling last-resort comparison\n\
317  -S, --buffer-size=SIZE    use SIZE for main memory buffer\n\
318"), stdout);
319      printf (_("\
320  -t, --field-separator=SEP  use SEP instead of non-blank to blank transition\n\
321  -T, --temporary-directory=DIR  use DIR for temporaries, not $TMPDIR or %s;\n\
322                              multiple options specify multiple directories\n\
323  -u, --unique              with -c, check for strict ordering;\n\
324                              without -c, output only the first of an equal run\n\
325"), DEFAULT_TMPDIR);
326      fputs (_("\
327  -z, --zero-terminated     end lines with 0 byte, not newline\n\
328"), stdout);
329      fputs (HELP_OPTION_DESCRIPTION, stdout);
330      fputs (VERSION_OPTION_DESCRIPTION, stdout);
331      fputs (_("\
332\n\
333POS is F[.C][OPTS], where F is the field number and C the character position\n\
334in the field.  OPTS is one or more single-letter ordering options, which\n\
335override global ordering options for that key.  If no key is given, use the\n\
336entire line as the key.\n\
337\n\
338SIZE may be followed by the following multiplicative suffixes:\n\
339"), stdout);
340      fputs (_("\
341% 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
342\n\
343With no FILE, or when FILE is -, read standard input.\n\
344\n\
345*** WARNING ***\n\
346The locale specified by the environment affects sort order.\n\
347Set LC_ALL=C to get the traditional sort order that uses\n\
348native byte values.\n\
349"), stdout );
350      printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
351    }
352
353  exit (status);
354}
355
356static char const short_options[] = "-bcdfgik:mMno:rsS:t:T:uy:z";
357
358static struct option const long_options[] =
359{
360  {"ignore-leading-blanks", no_argument, NULL, 'b'},
361  {"check", no_argument, NULL, 'c'},
362  {"dictionary-order", no_argument, NULL, 'd'},
363  {"ignore-case", no_argument, NULL, 'f'},
364  {"general-numeric-sort", no_argument, NULL, 'g'},
365  {"ignore-nonprinting", no_argument, NULL, 'i'},
366  {"key", required_argument, NULL, 'k'},
367  {"merge", no_argument, NULL, 'm'},
368  {"month-sort", no_argument, NULL, 'M'},
369  {"numeric-sort", no_argument, NULL, 'n'},
370  {"output", required_argument, NULL, 'o'},
371  {"reverse", no_argument, NULL, 'r'},
372  {"stable", no_argument, NULL, 's'},
373  {"buffer-size", required_argument, NULL, 'S'},
374  {"field-separator", required_argument, NULL, 't'},
375  {"temporary-directory", required_argument, NULL, 'T'},
376  {"unique", no_argument, NULL, 'u'},
377  {"zero-terminated", no_argument, NULL, 'z'},
378  {GETOPT_HELP_OPTION_DECL},
379  {GETOPT_VERSION_OPTION_DECL},
380  {NULL, 0, NULL, 0},
381};
382
383/* The set of signals that are caught.  */
384static sigset_t caught_signals;
385
386/* The list of temporary files. */
387struct tempnode
388{
389  struct tempnode *volatile next;
390  char name[1];  /* Actual size is 1 + file name length.  */
391};
392static struct tempnode *volatile temphead;
393static struct tempnode *volatile *temptail = &temphead;
394
395/* Clean up any remaining temporary files.  */
396
397static void
398cleanup (void)
399{
400  struct tempnode const *node;
401
402  for (node = temphead; node; node = node->next)
403    unlink (node->name);
404}
405
406/* Create a new temporary file, returning its newly allocated name.
407   Store into *PFP a stream open for writing.  */
408
409static char *
410create_temp_file (FILE **pfp)
411{
412  static char const slashbase[] = "/sortXXXXXX";
413  static size_t temp_dir_index;
414  sigset_t oldset;
415  int fd;
416  int saved_errno;
417  char const *temp_dir = temp_dirs[temp_dir_index];
418  size_t len = strlen (temp_dir);
419  struct tempnode *node =
420    xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
421  char *file = node->name;
422
423  memcpy (file, temp_dir, len);
424  memcpy (file + len, slashbase, sizeof slashbase);
425  node->next = NULL;
426  if (++temp_dir_index == temp_dir_count)
427    temp_dir_index = 0;
428
429  /* Create the temporary file in a critical section, to avoid races.  */
430  sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
431  fd = mkstemp (file);
432  if (0 <= fd)
433    {
434      *temptail = node;
435      temptail = &node->next;
436    }
437  saved_errno = errno;
438  sigprocmask (SIG_SETMASK, &oldset, NULL);
439  errno = saved_errno;
440
441  if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL)
442    die (_("cannot create temporary file"), file);
443
444  return file;
445}
446
447/* Return a stream for FILE, opened with mode HOW.  A null FILE means
448   standard output; HOW should be "w".  When opening for input, "-"
449   means standard input.  To avoid confusion, do not return file
450   descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
451   opening an ordinary FILE.  */
452
453static FILE *
454xfopen (const char *file, const char *how)
455{
456  FILE *fp;
457
458  if (!file)
459    fp = stdout;
460  else if (STREQ (file, "-") && *how == 'r')
461    {
462      have_read_stdin = true;
463      fp = stdin;
464    }
465  else
466    {
467      fp = fopen (file, how);
468      if (! fp)
469	die (_("open failed"), file);
470    }
471
472  return fp;
473}
474
475/* Close FP, whose name is FILE, and report any errors.  */
476
477static void
478xfclose (FILE *fp, char const *file)
479{
480  switch (fileno (fp))
481    {
482    case STDIN_FILENO:
483      /* Allow reading stdin from tty more than once.  */
484      if (feof (fp))
485	clearerr (fp);
486      break;
487
488    case STDOUT_FILENO:
489      /* Don't close stdout just yet.  close_stdout does that.  */
490      if (fflush (fp) != 0)
491	die (_("fflush failed"), file);
492      break;
493
494    default:
495      if (fclose (fp) != 0)
496	die (_("close failed"), file);
497      break;
498    }
499}
500
501static void
502write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
503{
504  if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
505    die (_("write failed"), output_file);
506}
507
508/* Append DIR to the array of temporary directory names.  */
509static void
510add_temp_dir (char const *dir)
511{
512  if (temp_dir_count == temp_dir_alloc)
513    temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
514
515  temp_dirs[temp_dir_count++] = dir;
516}
517
518/* Remove NAME from the list of temporary files.  */
519
520static void
521zaptemp (const char *name)
522{
523  struct tempnode *volatile *pnode;
524  struct tempnode *node;
525  struct tempnode *next;
526  sigset_t oldset;
527  int unlink_status;
528  int unlink_errno = 0;
529
530  for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
531    continue;
532
533  /* Unlink the temporary file in a critical section to avoid races.  */
534  next = node->next;
535  sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
536  unlink_status = unlink (name);
537  unlink_errno = errno;
538  *pnode = next;
539  sigprocmask (SIG_SETMASK, &oldset, NULL);
540
541  if (unlink_status != 0)
542    error (0, unlink_errno, _("warning: cannot remove: %s"), name);
543  if (! next)
544    temptail = pnode;
545  free (node);
546}
547
548#if HAVE_NL_LANGINFO
549
550static int
551struct_month_cmp (const void *m1, const void *m2)
552{
553  struct month const *month1 = m1;
554  struct month const *month2 = m2;
555  return strcmp (month1->name, month2->name);
556}
557
558#endif
559
560/* Initialize the character class tables. */
561
562static void
563inittables (void)
564{
565  size_t i;
566
567  for (i = 0; i < UCHAR_LIM; ++i)
568    {
569      blanks[i] = !!ISBLANK (i);
570      nonprinting[i] = !ISPRINT (i);
571      nondictionary[i] = !ISALNUM (i) && !ISBLANK (i);
572      fold_toupper[i] = (ISLOWER (i) ? toupper (i) : i);
573    }
574
575#if HAVE_NL_LANGINFO
576  /* If we're not in the "C" locale, read different names for months.  */
577  if (hard_LC_TIME)
578    {
579      for (i = 0; i < MONTHS_PER_YEAR; i++)
580	{
581	  char const *s;
582	  size_t s_len;
583	  size_t j;
584	  char *name;
585
586	  s = (char *) nl_langinfo (ABMON_1 + i);
587	  s_len = strlen (s);
588	  monthtab[i].name = name = xmalloc (s_len + 1);
589	  monthtab[i].val = i + 1;
590
591	  for (j = 0; j < s_len; j++)
592	    name[j] = fold_toupper[to_uchar (s[j])];
593	  name[j] = '\0';
594	}
595      qsort ((void *) monthtab, MONTHS_PER_YEAR,
596	     sizeof *monthtab, struct_month_cmp);
597    }
598#endif
599}
600
601/* Specify the amount of main memory to use when sorting.  */
602static void
603specify_sort_size (char const *s)
604{
605  uintmax_t n;
606  char *suffix;
607  enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
608
609  /* The default unit is KiB.  */
610  if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
611    {
612      if (n <= UINTMAX_MAX / 1024)
613	n *= 1024;
614      else
615	e = LONGINT_OVERFLOW;
616    }
617
618  /* A 'b' suffix means bytes; a '%' suffix means percent of memory.  */
619  if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
620    switch (suffix[0])
621      {
622      case 'b':
623	e = LONGINT_OK;
624	break;
625
626      case '%':
627	{
628	  double mem = physmem_total () * n / 100;
629
630	  /* Use "<", not "<=", to avoid problems with rounding.  */
631	  if (mem < UINTMAX_MAX)
632	    {
633	      n = mem;
634	      e = LONGINT_OK;
635	    }
636	  else
637	    e = LONGINT_OVERFLOW;
638	}
639	break;
640      }
641
642  if (e == LONGINT_OK)
643    {
644      /* If multiple sort sizes are specified, take the maximum, so
645	 that option order does not matter.  */
646      if (n < sort_size)
647	return;
648
649      sort_size = n;
650      if (sort_size == n)
651	{
652	  sort_size = MAX (sort_size, MIN_SORT_SIZE);
653	  return;
654	}
655
656      e = LONGINT_OVERFLOW;
657    }
658
659  STRTOL_FATAL_ERROR (s, _("sort size"), e);
660}
661
662/* Return the default sort size.  */
663static size_t
664default_sort_size (void)
665{
666  /* Let MEM be available memory or 1/8 of total memory, whichever
667     is greater.  */
668  double avail = physmem_available ();
669  double total = physmem_total ();
670  double mem = MAX (avail, total / 8);
671  struct rlimit rlimit;
672
673  /* Let SIZE be MEM, but no more than the maximum object size or
674     system resource limits.  Avoid the MIN macro here, as it is not
675     quite right when only one argument is floating point.  Don't
676     bother to check for values like RLIM_INFINITY since in practice
677     they are not much less than SIZE_MAX.  */
678  size_t size = SIZE_MAX;
679  if (mem < size)
680    size = mem;
681  if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
682    size = rlimit.rlim_cur;
683#ifdef RLIMIT_AS
684  if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
685    size = rlimit.rlim_cur;
686#endif
687
688  /* Leave a large safety margin for the above limits, as failure can
689     occur when they are exceeded.  */
690  size /= 2;
691
692#ifdef RLIMIT_RSS
693  /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
694     Exceeding RSS is not fatal, but can be quite slow.  */
695  if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
696    size = rlimit.rlim_cur / 16 * 15;
697#endif
698
699  /* Use no less than the minimum.  */
700  return MAX (size, MIN_SORT_SIZE);
701}
702
703/* Return the sort buffer size to use with the input files identified
704   by FPS and FILES, which are alternate names of the same files.
705   NFILES gives the number of input files; NFPS may be less.  Assume
706   that each input line requires LINE_BYTES extra bytes' worth of line
707   information.  Do not exceed the size bound specified by the user
708   (or a default size bound, if the user does not specify one).  */
709
710static size_t
711sort_buffer_size (FILE *const *fps, size_t nfps,
712		  char *const *files, size_t nfiles,
713		  size_t line_bytes)
714{
715  /* A bound on the input size.  If zero, the bound hasn't been
716     determined yet.  */
717  static size_t size_bound;
718
719  /* In the worst case, each input byte is a newline.  */
720  size_t worst_case_per_input_byte = line_bytes + 1;
721
722  /* Keep enough room for one extra input line and an extra byte.
723     This extra room might be needed when preparing to read EOF.  */
724  size_t size = worst_case_per_input_byte + 1;
725
726  size_t i;
727
728  for (i = 0; i < nfiles; i++)
729    {
730      struct stat st;
731      off_t file_size;
732      size_t worst_case;
733
734      if ((i < nfps ? fstat (fileno (fps[i]), &st)
735	   : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
736	   : stat (files[i], &st))
737	  != 0)
738	die (_("stat failed"), files[i]);
739
740      if (S_ISREG (st.st_mode))
741	file_size = st.st_size;
742      else
743	{
744	  /* The file has unknown size.  If the user specified a sort
745	     buffer size, use that; otherwise, guess the size.  */
746	  if (sort_size)
747	    return sort_size;
748	  file_size = INPUT_FILE_SIZE_GUESS;
749	}
750
751      if (! size_bound)
752	{
753	  size_bound = sort_size;
754	  if (! size_bound)
755	    size_bound = default_sort_size ();
756	}
757
758      /* Add the amount of memory needed to represent the worst case
759	 where the input consists entirely of newlines followed by a
760	 single non-newline.  Check for overflow.  */
761      worst_case = file_size * worst_case_per_input_byte + 1;
762      if (file_size != worst_case / worst_case_per_input_byte
763	  || size_bound - size <= worst_case)
764	return size_bound;
765      size += worst_case;
766    }
767
768  return size;
769}
770
771/* Initialize BUF.  Reserve LINE_BYTES bytes for each line; LINE_BYTES
772   must be at least sizeof (struct line).  Allocate ALLOC bytes
773   initially.  */
774
775static void
776initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
777{
778  /* Ensure that the line array is properly aligned.  If the desired
779     size cannot be allocated, repeatedly halve it until allocation
780     succeeds.  The smaller allocation may hurt overall performance,
781     but that's better than failing.  */
782  for (;;)
783    {
784      alloc += sizeof (struct line) - alloc % sizeof (struct line);
785      buf->buf = malloc (alloc);
786      if (buf->buf)
787	break;
788      alloc /= 2;
789      if (alloc <= line_bytes + 1)
790	xalloc_die ();
791    }
792
793  buf->line_bytes = line_bytes;
794  buf->alloc = alloc;
795  buf->used = buf->left = buf->nlines = 0;
796  buf->eof = false;
797}
798
799/* Return one past the limit of the line array.  */
800
801static inline struct line *
802buffer_linelim (struct buffer const *buf)
803{
804  return (struct line *) (buf->buf + buf->alloc);
805}
806
807/* Return a pointer to the first character of the field specified
808   by KEY in LINE. */
809
810static char *
811begfield (const struct line *line, const struct keyfield *key)
812{
813  char *ptr = line->text, *lim = ptr + line->length - 1;
814  size_t sword = key->sword;
815  size_t schar = key->schar;
816  size_t remaining_bytes;
817
818  /* The leading field separator itself is included in a field when -t
819     is absent.  */
820
821  if (tab != TAB_DEFAULT)
822    while (ptr < lim && sword--)
823      {
824	while (ptr < lim && *ptr != tab)
825	  ++ptr;
826	if (ptr < lim)
827	  ++ptr;
828      }
829  else
830    while (ptr < lim && sword--)
831      {
832	while (ptr < lim && blanks[to_uchar (*ptr)])
833	  ++ptr;
834	while (ptr < lim && !blanks[to_uchar (*ptr)])
835	  ++ptr;
836      }
837
838  if (key->skipsblanks)
839    while (ptr < lim && blanks[to_uchar (*ptr)])
840      ++ptr;
841
842  /* Advance PTR by SCHAR (if possible), but no further than LIM.  */
843  remaining_bytes = lim - ptr;
844  if (schar < remaining_bytes)
845    ptr += schar;
846  else
847    ptr = lim;
848
849  return ptr;
850}
851
852/* Return the limit of (a pointer to the first character after) the field
853   in LINE specified by KEY. */
854
855static char *
856limfield (const struct line *line, const struct keyfield *key)
857{
858  char *ptr = line->text, *lim = ptr + line->length - 1;
859  size_t eword = key->eword, echar = key->echar;
860  size_t remaining_bytes;
861
862  /* Move PTR past EWORD fields or to one past the last byte on LINE,
863     whichever comes first.  If there are more than EWORD fields, leave
864     PTR pointing at the beginning of the field having zero-based index,
865     EWORD.  If a delimiter character was specified (via -t), then that
866     `beginning' is the first character following the delimiting TAB.
867     Otherwise, leave PTR pointing at the first `blank' character after
868     the preceding field.  */
869  if (tab != TAB_DEFAULT)
870    while (ptr < lim && eword--)
871      {
872	while (ptr < lim && *ptr != tab)
873	  ++ptr;
874	if (ptr < lim && (eword | echar))
875	  ++ptr;
876      }
877  else
878    while (ptr < lim && eword--)
879      {
880	while (ptr < lim && blanks[to_uchar (*ptr)])
881	  ++ptr;
882	while (ptr < lim && !blanks[to_uchar (*ptr)])
883	  ++ptr;
884      }
885
886#ifdef POSIX_UNSPECIFIED
887  /* The following block of code makes GNU sort incompatible with
888     standard Unix sort, so it's ifdef'd out for now.
889     The POSIX spec isn't clear on how to interpret this.
890     FIXME: request clarification.
891
892     From: kwzh@gnu.ai.mit.edu (Karl Heuer)
893     Date: Thu, 30 May 96 12:20:41 -0400
894     [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
895
896     [...]I believe I've found another bug in `sort'.
897
898     $ cat /tmp/sort.in
899     a b c 2 d
900     pq rs 1 t
901     $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
902     a b c 2 d
903     pq rs 1 t
904     $ /bin/sort -k1.7,1.7 </tmp/sort.in
905     pq rs 1 t
906     a b c 2 d
907
908     Unix sort produced the answer I expected: sort on the single character
909     in column 7.  GNU sort produced different results, because it disagrees
910     on the interpretation of the key-end spec "M.N".  Unix sort reads this
911     as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
912     "skip M-1 fields, then either N-1 characters or the rest of the current
913     field, whichever comes first".  This extra clause applies only to
914     key-ends, not key-starts.
915     */
916
917  /* Make LIM point to the end of (one byte past) the current field.  */
918  if (tab != TAB_DEFAULT)
919    {
920      char *newlim;
921      newlim = memchr (ptr, tab, lim - ptr);
922      if (newlim)
923	lim = newlim;
924    }
925  else
926    {
927      char *newlim;
928      newlim = ptr;
929      while (newlim < lim && blanks[to_uchar (*newlim)])
930	++newlim;
931      while (newlim < lim && !blanks[to_uchar (*newlim)])
932	++newlim;
933      lim = newlim;
934    }
935#endif
936
937  /* If we're ignoring leading blanks when computing the End
938     of the field, don't start counting bytes until after skipping
939     past any leading blanks. */
940  if (key->skipeblanks)
941    while (ptr < lim && blanks[to_uchar (*ptr)])
942      ++ptr;
943
944  /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
945  remaining_bytes = lim - ptr;
946  if (echar < remaining_bytes)
947    ptr += echar;
948  else
949    ptr = lim;
950
951  return ptr;
952}
953
954/* Fill BUF reading from FP, moving buf->left bytes from the end
955   of buf->buf to the beginning first.  If EOF is reached and the
956   file wasn't terminated by a newline, supply one.  Set up BUF's line
957   table too.  FILE is the name of the file corresponding to FP.
958   Return true if some input was read.  */
959
960static bool
961fillbuf (struct buffer *buf, FILE *fp, char const *file)
962{
963  struct keyfield const *key = keylist;
964  char eol = eolchar;
965  size_t line_bytes = buf->line_bytes;
966  size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
967
968  if (buf->eof)
969    return false;
970
971  if (buf->used != buf->left)
972    {
973      memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
974      buf->used = buf->left;
975      buf->nlines = 0;
976    }
977
978  for (;;)
979    {
980      char *ptr = buf->buf + buf->used;
981      struct line *linelim = buffer_linelim (buf);
982      struct line *line = linelim - buf->nlines;
983      size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
984      char *line_start = buf->nlines ? line->text + line->length : buf->buf;
985
986      while (line_bytes + 1 < avail)
987	{
988	  /* Read as many bytes as possible, but do not read so many
989	     bytes that there might not be enough room for the
990	     corresponding line array.  The worst case is when the
991	     rest of the input file consists entirely of newlines,
992	     except that the last byte is not a newline.  */
993	  size_t readsize = (avail - 1) / (line_bytes + 1);
994	  size_t bytes_read = fread (ptr, 1, readsize, fp);
995	  char *ptrlim = ptr + bytes_read;
996	  char *p;
997	  avail -= bytes_read;
998
999	  if (bytes_read != readsize)
1000	    {
1001	      if (ferror (fp))
1002		die (_("read failed"), file);
1003	      if (feof (fp))
1004		{
1005		  buf->eof = true;
1006		  if (buf->buf == ptrlim)
1007		    return false;
1008		  if (ptrlim[-1] != eol)
1009		    *ptrlim++ = eol;
1010		}
1011	    }
1012
1013	  /* Find and record each line in the just-read input.  */
1014	  while ((p = memchr (ptr, eol, ptrlim - ptr)))
1015	    {
1016	      ptr = p + 1;
1017	      line--;
1018	      line->text = line_start;
1019	      line->length = ptr - line_start;
1020	      mergesize = MAX (mergesize, line->length);
1021	      avail -= line_bytes;
1022
1023	      if (key)
1024		{
1025		  /* Precompute the position of the first key for
1026		     efficiency.  */
1027		  line->keylim = (key->eword == SIZE_MAX
1028				  ? p
1029				  : limfield (line, key));
1030
1031		  if (key->sword != SIZE_MAX)
1032		    line->keybeg = begfield (line, key);
1033		  else
1034		    {
1035		      if (key->skipsblanks)
1036			while (blanks[to_uchar (*line_start)])
1037			  line_start++;
1038		      line->keybeg = line_start;
1039		    }
1040		}
1041
1042	      line_start = ptr;
1043	    }
1044
1045	  ptr = ptrlim;
1046	  if (buf->eof)
1047	    break;
1048	}
1049
1050      buf->used = ptr - buf->buf;
1051      buf->nlines = buffer_linelim (buf) - line;
1052      if (buf->nlines != 0)
1053	{
1054	  buf->left = ptr - line_start;
1055	  merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1056	  return true;
1057	}
1058
1059      /* The current input line is too long to fit in the buffer.
1060	 Double the buffer size and try again.  */
1061      buf->buf = X2REALLOC (buf->buf, &buf->alloc);
1062    }
1063}
1064
1065/* Compare strings A and B as numbers without explicitly converting them to
1066   machine numbers.  Comparatively slow for short strings, but asymptotically
1067   hideously fast. */
1068
1069static int
1070numcompare (const char *a, const char *b)
1071{
1072  while (blanks[to_uchar (*a)])
1073    a++;
1074  while (blanks[to_uchar (*b)])
1075    b++;
1076
1077  return strnumcmp (a, b, decimal_point, thousands_sep);
1078}
1079
1080static int
1081general_numcompare (const char *sa, const char *sb)
1082{
1083  /* FIXME: add option to warn about failed conversions.  */
1084  /* FIXME: maybe add option to try expensive FP conversion
1085     only if A and B can't be compared more cheaply/accurately.  */
1086
1087  char *ea;
1088  char *eb;
1089  double a = strtod (sa, &ea);
1090  double b = strtod (sb, &eb);
1091
1092  /* Put conversion errors at the start of the collating sequence.  */
1093  if (sa == ea)
1094    return sb == eb ? 0 : -1;
1095  if (sb == eb)
1096    return 1;
1097
1098  /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
1099     conversion errors but before numbers; sort them by internal
1100     bit-pattern, for lack of a more portable alternative.  */
1101  return (a < b ? -1
1102	  : a > b ? 1
1103	  : a == b ? 0
1104	  : b == b ? -1
1105	  : a == a ? 1
1106	  : memcmp ((char *) &a, (char *) &b, sizeof a));
1107}
1108
1109/* Return an integer in 1..12 of the month name MONTH with length LEN.
1110   Return 0 if the name in S is not recognized.  */
1111
1112static int
1113getmonth (char const *month, size_t len)
1114{
1115  size_t lo = 0;
1116  size_t hi = MONTHS_PER_YEAR;
1117  char const *monthlim = month + len;
1118
1119  for (;;)
1120    {
1121      if (month == monthlim)
1122	return 0;
1123      if (!blanks[to_uchar (*month)])
1124	break;
1125      ++month;
1126    }
1127
1128  do
1129    {
1130      size_t ix = (lo + hi) / 2;
1131      char const *m = month;
1132      char const *n = monthtab[ix].name;
1133
1134      for (;; m++, n++)
1135	{
1136	  if (!*n)
1137	    return monthtab[ix].val;
1138	  if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
1139	    {
1140	      hi = ix;
1141	      break;
1142	    }
1143	  else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
1144	    {
1145	      lo = ix + 1;
1146	      break;
1147	    }
1148	}
1149    }
1150  while (lo < hi);
1151
1152  return 0;
1153}
1154
1155/* Compare two lines A and B trying every key in sequence until there
1156   are no more keys or a difference is found. */
1157
1158static int
1159keycompare (const struct line *a, const struct line *b)
1160{
1161  struct keyfield const *key = keylist;
1162
1163  /* For the first iteration only, the key positions have been
1164     precomputed for us. */
1165  char *texta = a->keybeg;
1166  char *textb = b->keybeg;
1167  char *lima = a->keylim;
1168  char *limb = b->keylim;
1169
1170  int diff;
1171
1172  for (;;)
1173    {
1174      char const *translate = key->translate;
1175      bool const *ignore = key->ignore;
1176
1177      /* Find the lengths. */
1178      size_t lena = lima <= texta ? 0 : lima - texta;
1179      size_t lenb = limb <= textb ? 0 : limb - textb;
1180
1181      /* Actually compare the fields. */
1182      if (key->numeric | key->general_numeric)
1183	{
1184	  char savea = *lima, saveb = *limb;
1185
1186	  *lima = *limb = '\0';
1187	  diff = ((key->numeric ? numcompare : general_numcompare)
1188		  (texta, textb));
1189	  *lima = savea, *limb = saveb;
1190	}
1191      else if (key->month)
1192	diff = getmonth (texta, lena) - getmonth (textb, lenb);
1193      /* Sorting like this may become slow, so in a simple locale the user
1194	 can select a faster sort that is similar to ascii sort.  */
1195      else if (hard_LC_COLLATE)
1196	{
1197	  if (ignore || translate)
1198	    {
1199	      char buf[4000];
1200	      size_t size = lena + 1 + lenb + 1;
1201	      char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
1202	      char *copy_b = copy_a + lena + 1;
1203	      size_t new_len_a, new_len_b, i;
1204
1205	      /* Ignore and/or translate chars before comparing.  */
1206	      for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1207		{
1208		  if (i < lena)
1209		    {
1210		      copy_a[new_len_a] = (translate
1211					   ? translate[to_uchar (texta[i])]
1212					   : texta[i]);
1213		      if (!ignore || !ignore[to_uchar (texta[i])])
1214			++new_len_a;
1215		    }
1216		  if (i < lenb)
1217		    {
1218		      copy_b[new_len_b] = (translate
1219					   ? translate[to_uchar (textb[i])]
1220					   : textb [i]);
1221		      if (!ignore || !ignore[to_uchar (textb[i])])
1222			++new_len_b;
1223		    }
1224		}
1225
1226	      diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1227
1228	      if (sizeof buf < size)
1229		free (copy_a);
1230	    }
1231	  else if (lena == 0)
1232	    diff = - NONZERO (lenb);
1233	  else if (lenb == 0)
1234	    goto greater;
1235	  else
1236	    diff = xmemcoll (texta, lena, textb, lenb);
1237	}
1238      else if (ignore)
1239	{
1240#define CMP_WITH_IGNORE(A, B)						\
1241  do									\
1242    {									\
1243	  for (;;)							\
1244	    {								\
1245	      while (texta < lima && ignore[to_uchar (*texta)])		\
1246		++texta;						\
1247	      while (textb < limb && ignore[to_uchar (*textb)])		\
1248		++textb;						\
1249	      if (! (texta < lima && textb < limb))			\
1250		break;							\
1251	      diff = to_uchar (A) - to_uchar (B);			\
1252	      if (diff)							\
1253		goto not_equal;						\
1254	      ++texta;							\
1255	      ++textb;							\
1256	    }								\
1257									\
1258	  diff = (texta < lima) - (textb < limb);			\
1259    }									\
1260  while (0)
1261
1262	  if (translate)
1263	    CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1264			     translate[to_uchar (*textb)]);
1265	  else
1266	    CMP_WITH_IGNORE (*texta, *textb);
1267	}
1268      else if (lena == 0)
1269	diff = - NONZERO (lenb);
1270      else if (lenb == 0)
1271	goto greater;
1272      else
1273	{
1274	  if (translate)
1275	    {
1276	      while (texta < lima && textb < limb)
1277		{
1278		  diff = (to_uchar (translate[to_uchar (*texta++)])
1279			  - to_uchar (translate[to_uchar (*textb++)]));
1280		  if (diff)
1281		    goto not_equal;
1282		}
1283	    }
1284	  else
1285	    {
1286	      diff = memcmp (texta, textb, MIN (lena, lenb));
1287	      if (diff)
1288		goto not_equal;
1289	    }
1290	  diff = lena < lenb ? -1 : lena != lenb;
1291	}
1292
1293      if (diff)
1294	goto not_equal;
1295
1296      key = key->next;
1297      if (! key)
1298	break;
1299
1300      /* Find the beginning and limit of the next field.  */
1301      if (key->eword != SIZE_MAX)
1302	lima = limfield (a, key), limb = limfield (b, key);
1303      else
1304	lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1305
1306      if (key->sword != SIZE_MAX)
1307	texta = begfield (a, key), textb = begfield (b, key);
1308      else
1309	{
1310	  texta = a->text, textb = b->text;
1311	  if (key->skipsblanks)
1312	    {
1313	      while (texta < lima && blanks[to_uchar (*texta)])
1314		++texta;
1315	      while (textb < limb && blanks[to_uchar (*textb)])
1316		++textb;
1317	    }
1318	}
1319    }
1320
1321  return 0;
1322
1323 greater:
1324  diff = 1;
1325 not_equal:
1326  return key->reverse ? -diff : diff;
1327}
1328
1329/* Compare two lines A and B, returning negative, zero, or positive
1330   depending on whether A compares less than, equal to, or greater than B. */
1331
1332static int
1333compare (const struct line *a, const struct line *b)
1334{
1335  int diff;
1336  size_t alen, blen;
1337
1338  /* First try to compare on the specified keys (if any).
1339     The only two cases with no key at all are unadorned sort,
1340     and unadorned sort -r. */
1341  if (keylist)
1342    {
1343      diff = keycompare (a, b);
1344      if (diff | unique | stable)
1345	return diff;
1346    }
1347
1348  /* If the keys all compare equal (or no keys were specified)
1349     fall through to the default comparison.  */
1350  alen = a->length - 1, blen = b->length - 1;
1351
1352  if (alen == 0)
1353    diff = - NONZERO (blen);
1354  else if (blen == 0)
1355    diff = 1;
1356  else if (hard_LC_COLLATE)
1357    diff = xmemcoll (a->text, alen, b->text, blen);
1358  else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
1359    diff = alen < blen ? -1 : alen != blen;
1360
1361  return reverse ? -diff : diff;
1362}
1363
1364/* Check that the lines read from FILE_NAME come in order.  Print a
1365   diagnostic (FILE_NAME, line number, contents of line) to stderr and return
1366   false if they are not in order.  Otherwise, print no diagnostic
1367   and return true.  */
1368
1369static bool
1370check (char const *file_name)
1371{
1372  FILE *fp = xfopen (file_name, "r");
1373  struct buffer buf;		/* Input buffer. */
1374  struct line temp;		/* Copy of previous line. */
1375  size_t alloc = 0;
1376  uintmax_t line_number = 0;
1377  struct keyfield const *key = keylist;
1378  bool nonunique = ! unique;
1379  bool ordered = true;
1380
1381  initbuf (&buf, sizeof (struct line),
1382	   MAX (merge_buffer_size, sort_size));
1383  temp.text = NULL;
1384
1385  while (fillbuf (&buf, fp, file_name))
1386    {
1387      struct line const *line = buffer_linelim (&buf);
1388      struct line const *linebase = line - buf.nlines;
1389
1390      /* Make sure the line saved from the old buffer contents is
1391	 less than or equal to the first line of the new buffer. */
1392      if (alloc && nonunique <= compare (&temp, line - 1))
1393	{
1394	found_disorder:
1395	  {
1396	    struct line const *disorder_line = line - 1;
1397	    uintmax_t disorder_line_number =
1398	      buffer_linelim (&buf) - disorder_line + line_number;
1399	    char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
1400	    fprintf (stderr, _("%s: %s:%s: disorder: "),
1401		     program_name, file_name,
1402		     umaxtostr (disorder_line_number, hr_buf));
1403	    write_bytes (disorder_line->text, disorder_line->length, stderr,
1404			 _("standard error"));
1405	    ordered = false;
1406	    break;
1407	  }
1408	}
1409
1410      /* Compare each line in the buffer with its successor.  */
1411      while (linebase < --line)
1412	if (nonunique <= compare (line, line - 1))
1413	  goto found_disorder;
1414
1415      line_number += buf.nlines;
1416
1417      /* Save the last line of the buffer.  */
1418      if (alloc < line->length)
1419	{
1420	  do
1421	    {
1422	      alloc *= 2;
1423	      if (! alloc)
1424		{
1425		  alloc = line->length;
1426		  break;
1427		}
1428	    }
1429	  while (alloc < line->length);
1430
1431	  temp.text = xrealloc (temp.text, alloc);
1432	}
1433      memcpy (temp.text, line->text, line->length);
1434      temp.length = line->length;
1435      if (key)
1436	{
1437	  temp.keybeg = temp.text + (line->keybeg - line->text);
1438	  temp.keylim = temp.text + (line->keylim - line->text);
1439	}
1440    }
1441
1442  xfclose (fp, file_name);
1443  free (buf.buf);
1444  free (temp.text);
1445  return ordered;
1446}
1447
1448/* Merge lines from FILES onto OFP.  NTEMPS is the number of temporary
1449   files (all of which are at the start of the FILES array), and
1450   NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
1451   Close input and output files before returning.
1452   OUTPUT_FILE gives the name of the output file.  If it is NULL,
1453   the output file is standard output.  If OFP is NULL, the output
1454   file has not been opened yet (or written to, if standard output).  */
1455
1456static void
1457mergefps (char **files, size_t ntemps, size_t nfiles,
1458	  FILE *ofp, char const *output_file)
1459{
1460  FILE *fps[NMERGE];		/* Input streams for each file.  */
1461  struct buffer buffer[NMERGE];	/* Input buffers for each file. */
1462  struct line saved;		/* Saved line storage for unique check. */
1463  struct line const *savedline = NULL;
1464				/* &saved if there is a saved line. */
1465  size_t savealloc = 0;		/* Size allocated for the saved line. */
1466  struct line const *cur[NMERGE]; /* Current line in each line table. */
1467  struct line const *base[NMERGE]; /* Base of each line table.  */
1468  size_t ord[NMERGE];		/* Table representing a permutation of fps,
1469				   such that cur[ord[0]] is the smallest line
1470				   and will be next output. */
1471  size_t i;
1472  size_t j;
1473  size_t t;
1474  struct keyfield const *key = keylist;
1475  saved.text = NULL;
1476
1477  /* Read initial lines from each input file. */
1478  for (i = 0; i < nfiles; )
1479    {
1480      fps[i] = xfopen (files[i], "r");
1481      initbuf (&buffer[i], sizeof (struct line),
1482	       MAX (merge_buffer_size, sort_size / nfiles));
1483      if (fillbuf (&buffer[i], fps[i], files[i]))
1484	{
1485	  struct line const *linelim = buffer_linelim (&buffer[i]);
1486	  cur[i] = linelim - 1;
1487	  base[i] = linelim - buffer[i].nlines;
1488	  i++;
1489	}
1490      else
1491	{
1492	  /* fps[i] is empty; eliminate it from future consideration.  */
1493	  xfclose (fps[i], files[i]);
1494	  if (i < ntemps)
1495	    {
1496	      ntemps--;
1497	      zaptemp (files[i]);
1498	    }
1499	  free (buffer[i].buf);
1500	  --nfiles;
1501	  for (j = i; j < nfiles; ++j)
1502	    files[j] = files[j + 1];
1503	}
1504    }
1505
1506  if (! ofp)
1507    ofp = xfopen (output_file, "w");
1508
1509  /* Set up the ord table according to comparisons among input lines.
1510     Since this only reorders two items if one is strictly greater than
1511     the other, it is stable. */
1512  for (i = 0; i < nfiles; ++i)
1513    ord[i] = i;
1514  for (i = 1; i < nfiles; ++i)
1515    if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
1516      t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1517
1518  /* Repeatedly output the smallest line until no input remains. */
1519  while (nfiles)
1520    {
1521      struct line const *smallest = cur[ord[0]];
1522
1523      /* If uniquified output is turned on, output only the first of
1524	 an identical series of lines. */
1525      if (unique)
1526	{
1527	  if (savedline && compare (savedline, smallest))
1528	    {
1529	      savedline = NULL;
1530	      write_bytes (saved.text, saved.length, ofp, output_file);
1531	    }
1532	  if (!savedline)
1533	    {
1534	      savedline = &saved;
1535	      if (savealloc < smallest->length)
1536		{
1537		  do
1538		    if (! savealloc)
1539		      {
1540			savealloc = smallest->length;
1541			break;
1542		      }
1543		  while ((savealloc *= 2) < smallest->length);
1544
1545		  saved.text = xrealloc (saved.text, savealloc);
1546		}
1547	      saved.length = smallest->length;
1548	      memcpy (saved.text, smallest->text, saved.length);
1549	      if (key)
1550		{
1551		  saved.keybeg =
1552		    saved.text + (smallest->keybeg - smallest->text);
1553		  saved.keylim =
1554		    saved.text + (smallest->keylim - smallest->text);
1555		}
1556	    }
1557	}
1558      else
1559	write_bytes (smallest->text, smallest->length, ofp, output_file);
1560
1561      /* Check if we need to read more lines into core. */
1562      if (base[ord[0]] < smallest)
1563	cur[ord[0]] = smallest - 1;
1564      else
1565	{
1566	  if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]]))
1567	    {
1568	      struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
1569	      cur[ord[0]] = linelim - 1;
1570	      base[ord[0]] = linelim - buffer[ord[0]].nlines;
1571	    }
1572	  else
1573	    {
1574	      /* We reached EOF on fps[ord[0]].  */
1575	      for (i = 1; i < nfiles; ++i)
1576		if (ord[i] > ord[0])
1577		  --ord[i];
1578	      --nfiles;
1579	      xfclose (fps[ord[0]], files[ord[0]]);
1580	      if (ord[0] < ntemps)
1581		{
1582		  ntemps--;
1583		  zaptemp (files[ord[0]]);
1584		}
1585	      free (buffer[ord[0]].buf);
1586	      for (i = ord[0]; i < nfiles; ++i)
1587		{
1588		  fps[i] = fps[i + 1];
1589		  files[i] = files[i + 1];
1590		  buffer[i] = buffer[i + 1];
1591		  cur[i] = cur[i + 1];
1592		  base[i] = base[i + 1];
1593		}
1594	      for (i = 0; i < nfiles; ++i)
1595		ord[i] = ord[i + 1];
1596	      continue;
1597	    }
1598	}
1599
1600      /* The new line just read in may be larger than other lines
1601	 already in main memory; push it back in the queue until we
1602	 encounter a line larger than it.  Optimize for the common
1603	 case where the new line is smallest.  */
1604      {
1605	size_t lo = 1;
1606	size_t hi = nfiles;
1607	size_t probe = lo;
1608	size_t ord0 = ord[0];
1609	size_t count_of_smaller_lines;
1610
1611	while (lo < hi)
1612	  {
1613	    int cmp = compare (cur[ord0], cur[ord[probe]]);
1614	    if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
1615	      hi = probe;
1616	    else
1617	      lo = probe + 1;
1618	    probe = (lo + hi) / 2;
1619	  }
1620
1621	count_of_smaller_lines = lo - 1;
1622	for (j = 0; j < count_of_smaller_lines; j++)
1623	  ord[j] = ord[j + 1];
1624	ord[count_of_smaller_lines] = ord0;
1625      }
1626    }
1627
1628  if (unique && savedline)
1629    {
1630      write_bytes (saved.text, saved.length, ofp, output_file);
1631      free (saved.text);
1632    }
1633
1634  xfclose (ofp, output_file);
1635}
1636
1637/* Merge into T the two sorted arrays of lines LO (with NLO members)
1638   and HI (with NHI members).  T, LO, and HI point just past their
1639   respective arrays, and the arrays are in reverse order.  NLO and
1640   NHI must be positive, and HI - NHI must equal T - (NLO + NHI).  */
1641
1642static inline void
1643mergelines (struct line *t,
1644	    struct line const *lo, size_t nlo,
1645	    struct line const *hi, size_t nhi)
1646{
1647  for (;;)
1648    if (compare (lo - 1, hi - 1) <= 0)
1649      {
1650	*--t = *--lo;
1651	if (! --nlo)
1652	  {
1653	    /* HI - NHI equalled T - (NLO + NHI) when this function
1654	       began.  Therefore HI must equal T now, and there is no
1655	       need to copy from HI to T.  */
1656	    return;
1657	  }
1658      }
1659    else
1660      {
1661	*--t = *--hi;
1662	if (! --nhi)
1663	  {
1664	    do
1665	      *--t = *--lo;
1666	    while (--nlo);
1667
1668	    return;
1669	  }
1670      }
1671}
1672
1673/* Sort the array LINES with NLINES members, using TEMP for temporary space.
1674   NLINES must be at least 2.
1675   The input and output arrays are in reverse order, and LINES and
1676   TEMP point just past the end of their respective arrays.
1677
1678   Use a recursive divide-and-conquer algorithm, in the style
1679   suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23.  Use
1680   the optimization suggested by exercise 5.2.4-10; this requires room
1681   for only 1.5*N lines, rather than the usual 2*N lines.  Knuth
1682   writes that this memory optimization was originally published by
1683   D. A. Bell, Comp J. 1 (1958), 75.  */
1684
1685static void
1686sortlines (struct line *lines, size_t nlines, struct line *temp)
1687{
1688  if (nlines == 2)
1689    {
1690      if (0 < compare (&lines[-1], &lines[-2]))
1691	{
1692	  struct line tmp = lines[-1];
1693	  lines[-1] = lines[-2];
1694	  lines[-2] = tmp;
1695	}
1696    }
1697  else
1698    {
1699      size_t nlo = nlines / 2;
1700      size_t nhi = nlines - nlo;
1701      struct line *lo = lines;
1702      struct line *hi = lines - nlo;
1703      struct line *sorted_lo = temp;
1704
1705      sortlines (hi, nhi, temp);
1706      if (1 < nlo)
1707	sortlines_temp (lo, nlo, sorted_lo);
1708      else
1709	sorted_lo[-1] = lo[-1];
1710
1711      mergelines (lines, sorted_lo, nlo, hi, nhi);
1712    }
1713}
1714
1715/* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
1716   rather than sorting in place.  */
1717
1718static void
1719sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
1720{
1721  if (nlines == 2)
1722    {
1723      /* Declare `swap' as int, not bool, to work around a bug
1724	 <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
1725	 in the IBM xlc 6.0.0.0 compiler in 64-bit mode.  */
1726      int swap = (0 < compare (&lines[-1], &lines[-2]));
1727      temp[-1] = lines[-1 - swap];
1728      temp[-2] = lines[-2 + swap];
1729    }
1730  else
1731    {
1732      size_t nlo = nlines / 2;
1733      size_t nhi = nlines - nlo;
1734      struct line *lo = lines;
1735      struct line *hi = lines - nlo;
1736      struct line *sorted_hi = temp - nlo;
1737
1738      sortlines_temp (hi, nhi, sorted_hi);
1739      if (1 < nlo)
1740	sortlines (lo, nlo, temp);
1741
1742      mergelines (temp, lo, nlo, sorted_hi, nhi);
1743    }
1744}
1745
1746/* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
1747   the same as OUTFILE.  If found, merge the found instances (and perhaps
1748   some other files) into a temporary file so that it can in turn be
1749   merged into OUTFILE without destroying OUTFILE before it is completely
1750   read.  Return the new value of NFILES, which differs from the old if
1751   some merging occurred.
1752
1753   This test ensures that an otherwise-erroneous use like
1754   "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
1755   It's not clear that POSIX requires this nicety.
1756   Detect common error cases, but don't try to catch obscure cases like
1757   "cat ... FILE ... | sort -m -o FILE"
1758   where traditional "sort" doesn't copy the input and where
1759   people should know that they're getting into trouble anyway.
1760   Catching these obscure cases would slow down performance in
1761   common cases.  */
1762
1763static size_t
1764avoid_trashing_input (char **files, size_t ntemps, size_t nfiles,
1765		      char const *outfile)
1766{
1767  size_t i;
1768  bool got_outstat = false;
1769  struct stat outstat;
1770
1771  for (i = ntemps; i < nfiles; i++)
1772    {
1773      bool is_stdin = STREQ (files[i], "-");
1774      bool same;
1775      struct stat instat;
1776
1777      if (outfile && STREQ (outfile, files[i]) && !is_stdin)
1778	same = true;
1779      else
1780	{
1781	  if (! got_outstat)
1782	    {
1783	      if ((outfile
1784		   ? stat (outfile, &outstat)
1785		   : fstat (STDOUT_FILENO, &outstat))
1786		  != 0)
1787		break;
1788	      got_outstat = true;
1789	    }
1790
1791	  same = (((is_stdin
1792		    ? fstat (STDIN_FILENO, &instat)
1793		    : stat (files[i], &instat))
1794		   == 0)
1795		  && SAME_INODE (instat, outstat));
1796	}
1797
1798      if (same)
1799	{
1800	  FILE *tftp;
1801	  char *temp = create_temp_file (&tftp);
1802	  mergefps (&files[i], 0, nfiles - i, tftp, temp);
1803	  files[i] = temp;
1804	  return i + 1;
1805	}
1806    }
1807
1808  return nfiles;
1809}
1810
1811/* Merge the input FILES.  NTEMPS is the number of files at the
1812   start of FILES that are temporary; it is zero at the top level.
1813   NFILES is the total number of files.  Put the output in
1814   OUTPUT_FILE; a null OUTPUT_FILE stands for standard output.  */
1815
1816static void
1817merge (char **files, size_t ntemps, size_t nfiles, char const *output_file)
1818{
1819  while (NMERGE < nfiles)
1820    {
1821      /* Number of input files processed so far.  */
1822      size_t in;
1823
1824      /* Number of output files generated so far.  */
1825      size_t out;
1826
1827      /* nfiles % NMERGE; this counts input files that are left over
1828	 after all full-sized merges have been done.  */
1829      size_t remainder;
1830
1831      /* Number of easily-available slots at the next loop iteration.  */
1832      size_t cheap_slots;
1833
1834      /* Do as many NMERGE-size merges as possible.  */
1835      for (out = in = 0; out < nfiles / NMERGE; out++, in += NMERGE)
1836	{
1837	  FILE *tfp;
1838	  char *temp = create_temp_file (&tfp);
1839	  size_t nt = MIN (ntemps, NMERGE);
1840	  ntemps -= nt;
1841	  mergefps (&files[in], nt, NMERGE, tfp, temp);
1842	  files[out] = temp;
1843	}
1844
1845      remainder = nfiles - in;
1846      cheap_slots = NMERGE - out % NMERGE;
1847
1848      if (cheap_slots < remainder)
1849	{
1850	  /* So many files remain that they can't all be put into the last
1851	     NMERGE-sized output window.  Do one more merge.  Merge as few
1852	     files as possible, to avoid needless I/O.  */
1853	  size_t nshortmerge = remainder - cheap_slots + 1;
1854	  FILE *tfp;
1855	  char *temp = create_temp_file (&tfp);
1856	  size_t nt = MIN (ntemps, nshortmerge);
1857	  ntemps -= nt;
1858	  mergefps (&files[in], nt, nshortmerge, tfp, temp);
1859	  files[out++] = temp;
1860	  in += nshortmerge;
1861	}
1862
1863      /* Put the remaining input files into the last NMERGE-sized output
1864	 window, so they will be merged in the next pass.  */
1865      memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
1866      ntemps += out;
1867      nfiles -= in - out;
1868    }
1869
1870  nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
1871  mergefps (files, ntemps, nfiles, NULL, output_file);
1872}
1873
1874/* Sort NFILES FILES onto OUTPUT_FILE. */
1875
1876static void
1877sort (char * const *files, size_t nfiles, char const *output_file)
1878{
1879  struct buffer buf;
1880  size_t ntemps = 0;
1881  bool output_file_created = false;
1882
1883  buf.alloc = 0;
1884
1885  while (nfiles)
1886    {
1887      char const *temp_output;
1888      char const *file = *files;
1889      FILE *fp = xfopen (file, "r");
1890      FILE *tfp;
1891      size_t bytes_per_line = (2 * sizeof (struct line)
1892			       - sizeof (struct line) / 2);
1893
1894      if (! buf.alloc)
1895	initbuf (&buf, bytes_per_line,
1896		 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
1897      buf.eof = false;
1898      files++;
1899      nfiles--;
1900
1901      while (fillbuf (&buf, fp, file))
1902	{
1903	  struct line *line;
1904	  struct line *linebase;
1905
1906	  if (buf.eof && nfiles
1907	      && (bytes_per_line + 1
1908		  < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
1909	    {
1910	      /* End of file, but there is more input and buffer room.
1911		 Concatenate the next input file; this is faster in
1912		 the usual case.  */
1913	      buf.left = buf.used;
1914	      break;
1915	    }
1916
1917	  line = buffer_linelim (&buf);
1918	  linebase = line - buf.nlines;
1919	  if (1 < buf.nlines)
1920	    sortlines (line, buf.nlines, linebase);
1921	  if (buf.eof && !nfiles && !ntemps && !buf.left)
1922	    {
1923	      xfclose (fp, file);
1924	      tfp = xfopen (output_file, "w");
1925	      temp_output = output_file;
1926	      output_file_created = true;
1927	    }
1928	  else
1929	    {
1930	      ++ntemps;
1931	      temp_output = create_temp_file (&tfp);
1932	    }
1933
1934	  do
1935	    {
1936	      line--;
1937	      write_bytes (line->text, line->length, tfp, temp_output);
1938	      if (unique)
1939		while (linebase < line && compare (line, line - 1) == 0)
1940		  line--;
1941	    }
1942	  while (linebase < line);
1943
1944	  xfclose (tfp, temp_output);
1945
1946	  if (output_file_created)
1947	    goto finish;
1948	}
1949      xfclose (fp, file);
1950    }
1951
1952 finish:
1953  free (buf.buf);
1954
1955  if (! output_file_created)
1956    {
1957      size_t i;
1958      struct tempnode *node = temphead;
1959      char **tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
1960      for (i = 0; node; i++)
1961	{
1962	  tempfiles[i] = node->name;
1963	  node = node->next;
1964	}
1965      merge (tempfiles, ntemps, ntemps, output_file);
1966      free (tempfiles);
1967    }
1968}
1969
1970/* Insert key KEY at the end of the key list.  */
1971
1972static void
1973insertkey (struct keyfield *key)
1974{
1975  struct keyfield **p;
1976
1977  for (p = &keylist; *p; p = &(*p)->next)
1978    continue;
1979  *p = key;
1980  key->next = NULL;
1981}
1982
1983/* Report a bad field specification SPEC, with extra info MSGID.  */
1984
1985static void badfieldspec (char const *, char const *)
1986     ATTRIBUTE_NORETURN;
1987static void
1988badfieldspec (char const *spec, char const *msgid)
1989{
1990  error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
1991	 _(msgid), quote (spec));
1992  abort ();
1993}
1994
1995/* Parse the leading integer in STRING and store the resulting value
1996   (which must fit into size_t) into *VAL.  Return the address of the
1997   suffix after the integer.  If MSGID is NULL, return NULL after
1998   failure; otherwise, report MSGID and exit on failure.  */
1999
2000static char const *
2001parse_field_count (char const *string, size_t *val, char const *msgid)
2002{
2003  char *suffix;
2004  uintmax_t n;
2005
2006  switch (xstrtoumax (string, &suffix, 10, &n, ""))
2007    {
2008    case LONGINT_OK:
2009    case LONGINT_INVALID_SUFFIX_CHAR:
2010      *val = n;
2011      if (*val == n)
2012	break;
2013      /* Fall through.  */
2014    case LONGINT_OVERFLOW:
2015    case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2016      if (msgid)
2017	error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"),
2018	       _(msgid), (int) (suffix - string), string);
2019      return NULL;
2020
2021    case LONGINT_INVALID:
2022      if (msgid)
2023	error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
2024	       _(msgid), quote (string));
2025      return NULL;
2026    }
2027
2028  return suffix;
2029}
2030
2031/* Handle interrupts and hangups. */
2032
2033static void
2034sighandler (int sig)
2035{
2036  if (! SA_NOCLDSTOP)
2037    signal (sig, SIG_IGN);
2038
2039  cleanup ();
2040
2041  signal (sig, SIG_DFL);
2042  raise (sig);
2043}
2044
2045/* Set the ordering options for KEY specified in S.
2046   Return the address of the first character in S that
2047   is not a valid ordering option.
2048   BLANKTYPE is the kind of blanks that 'b' should skip. */
2049
2050static char *
2051set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
2052{
2053  while (*s)
2054    {
2055      switch (*s)
2056	{
2057	case 'b':
2058	  if (blanktype == bl_start || blanktype == bl_both)
2059	    key->skipsblanks = true;
2060	  if (blanktype == bl_end || blanktype == bl_both)
2061	    key->skipeblanks = true;
2062	  break;
2063	case 'd':
2064	  key->ignore = nondictionary;
2065	  break;
2066	case 'f':
2067	  key->translate = fold_toupper;
2068	  break;
2069	case 'g':
2070	  key->general_numeric = true;
2071	  break;
2072	case 'i':
2073	  /* Option order should not matter, so don't let -i override
2074	     -d.  -d implies -i, but -i does not imply -d.  */
2075	  if (! key->ignore)
2076	    key->ignore = nonprinting;
2077	  break;
2078	case 'M':
2079	  key->month = true;
2080	  break;
2081	case 'n':
2082	  key->numeric = true;
2083	  break;
2084	case 'r':
2085	  key->reverse = true;
2086	  break;
2087	default:
2088	  return (char *) s;
2089	}
2090      ++s;
2091    }
2092  return (char *) s;
2093}
2094
2095static struct keyfield *
2096new_key (void)
2097{
2098  struct keyfield *key = xzalloc (sizeof *key);
2099  key->eword = SIZE_MAX;
2100  return key;
2101}
2102
2103int
2104main (int argc, char **argv)
2105{
2106  struct keyfield *key;
2107  struct keyfield gkey;
2108  char const *s;
2109  int c = 0;
2110  bool checkonly = false;
2111  bool mergeonly = false;
2112  size_t nfiles = 0;
2113  bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2114  bool obsolete_usage = (posix2_version () < 200112);
2115  char *minus = "-", **files;
2116  char const *outfile = NULL;
2117
2118  initialize_main (&argc, &argv);
2119  program_name = (argv[0]?argv[0]:PROGRAM_NAME);;
2120  setlocale (LC_ALL, "");
2121  bindtextdomain (PACKAGE, LOCALEDIR);
2122  textdomain (PACKAGE);
2123
2124  atexit (cleanup);
2125
2126  initialize_exit_failure (SORT_FAILURE);
2127  atexit (close_stdout);
2128
2129  hard_LC_COLLATE = hard_locale (LC_COLLATE);
2130#if HAVE_NL_LANGINFO
2131  hard_LC_TIME = hard_locale (LC_TIME);
2132#endif
2133
2134  /* Get locale's representation of the decimal point.  */
2135  {
2136    struct lconv const *locale = localeconv ();
2137
2138    /* If the locale doesn't define a decimal point, or if the decimal
2139       point is multibyte, use the C locale's decimal point.  FIXME:
2140       add support for multibyte decimal points.  */
2141    decimal_point = to_uchar (locale->decimal_point[0]);
2142    if (! decimal_point || locale->decimal_point[1])
2143      decimal_point = '.';
2144
2145    /* FIXME: add support for multibyte thousands separators.  */
2146    thousands_sep = to_uchar (*locale->thousands_sep);
2147    if (! thousands_sep || locale->thousands_sep[1])
2148      thousands_sep = -1;
2149  }
2150
2151  have_read_stdin = false;
2152  inittables ();
2153
2154  {
2155    size_t i;
2156    static int const sig[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM };
2157    enum { nsigs = sizeof sig / sizeof sig[0] };
2158
2159#if SA_NOCLDSTOP
2160    struct sigaction act;
2161
2162    sigemptyset (&caught_signals);
2163    for (i = 0; i < nsigs; i++)
2164      {
2165	sigaction (sig[i], NULL, &act);
2166	if (act.sa_handler != SIG_IGN)
2167	  sigaddset (&caught_signals, sig[i]);
2168      }
2169
2170    act.sa_handler = sighandler;
2171    act.sa_mask = caught_signals;
2172    act.sa_flags = 0;
2173
2174    for (i = 0; i < nsigs; i++)
2175      if (sigismember (&caught_signals, sig[i]))
2176	sigaction (sig[i], &act, NULL);
2177#else
2178    for (i = 0; i < nsigs; i++)
2179      if (signal (sig[i], SIG_IGN) != SIG_IGN)
2180	{
2181	  signal (sig[i], sighandler);
2182	  siginterrupt (sig[i], 1);
2183	}
2184#endif
2185  }
2186
2187  gkey.sword = gkey.eword = SIZE_MAX;
2188  gkey.ignore = NULL;
2189  gkey.translate = NULL;
2190  gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false;
2191  gkey.skipsblanks = gkey.skipeblanks = false;
2192
2193  files = xnmalloc (argc, sizeof *files);
2194
2195  for (;;)
2196    {
2197      /* Parse an operand as a file after "--" was seen; or if
2198	 pedantic and a file was seen, unless the POSIX version
2199	 predates 1003.1-2001 and -c was not seen and the operand is
2200	 "-o FILE" or "-oFILE".  */
2201
2202      if (c == -1
2203	  || (posixly_correct && nfiles != 0
2204	      && ! (obsolete_usage
2205		    && ! checkonly
2206		    && optind != argc && argv[optind]
2207		    && argv[optind][0] == '-' && argv[optind][1] == 'o'
2208		    && (argv[optind][2] || optind + 1 != argc)))
2209	  || ((c = getopt_long (argc, argv, short_options,
2210				long_options, NULL))
2211	      == -1))
2212	{
2213	  if (argc <= optind)
2214	    break;
2215	  files[nfiles++] = argv[optind++];
2216	}
2217      else switch (c)
2218	{
2219	case 1:
2220	  key = NULL;
2221	  if (obsolete_usage && optarg[0] == '+')
2222	    {
2223	      /* Treat +POS1 [-POS2] as a key if possible; but silently
2224		 treat an operand as a file if it is not a valid +POS1.  */
2225	      key = new_key ();
2226	      s = parse_field_count (optarg + 1, &key->sword, NULL);
2227	      if (s && *s == '.')
2228		s = parse_field_count (s + 1, &key->schar, NULL);
2229	      if (! (key->sword | key->schar))
2230		key->sword = SIZE_MAX;
2231	      if (! s || *set_ordering (s, key, bl_start))
2232		{
2233		  free (key);
2234		  key = NULL;
2235		}
2236	      else
2237		{
2238		  if (optind != argc && argv[optind] && argv[optind][0] == '-'
2239		      && ISDIGIT (argv[optind][1]))
2240		    {
2241		      char const *optarg1 = argv[optind++];
2242		      s = parse_field_count (optarg1 + 1, &key->eword,
2243					     N_("invalid number after `-'"));
2244		      if (*s == '.')
2245			s = parse_field_count (s + 1, &key->echar,
2246					       N_("invalid number after `.'"));
2247		      if (*set_ordering (s, key, bl_end))
2248			badfieldspec (optarg1,
2249				      N_("stray character in field spec"));
2250		    }
2251		  insertkey (key);
2252		}
2253	    }
2254	  if (! key)
2255	    files[nfiles++] = optarg;
2256	  break;
2257
2258	case 'b':
2259	case 'd':
2260	case 'f':
2261	case 'g':
2262	case 'i':
2263	case 'M':
2264	case 'n':
2265	case 'r':
2266	  {
2267	    char str[2];
2268	    str[0] = c;
2269	    str[1] = '\0';
2270	    set_ordering (str, &gkey, bl_both);
2271	  }
2272	  break;
2273
2274	case 'c':
2275	  checkonly = true;
2276	  break;
2277
2278	case 'k':
2279	  key = new_key ();
2280
2281	  /* Get POS1. */
2282	  s = parse_field_count (optarg, &key->sword,
2283				 N_("invalid number at field start"));
2284	  if (! key->sword--)
2285	    {
2286	      /* Provoke with `sort -k0' */
2287	      badfieldspec (optarg, N_("field number is zero"));
2288	    }
2289	  if (*s == '.')
2290	    {
2291	      s = parse_field_count (s + 1, &key->schar,
2292				     N_("invalid number after `.'"));
2293	      if (! key->schar--)
2294		{
2295		  /* Provoke with `sort -k1.0' */
2296		  badfieldspec (optarg, N_("character offset is zero"));
2297		}
2298	    }
2299	  if (! (key->sword | key->schar))
2300	    key->sword = SIZE_MAX;
2301	  s = set_ordering (s, key, bl_start);
2302	  if (*s != ',')
2303	    {
2304	      key->eword = SIZE_MAX;
2305	      key->echar = 0;
2306	    }
2307	  else
2308	    {
2309	      /* Get POS2. */
2310	      s = parse_field_count (s + 1, &key->eword,
2311				     N_("invalid number after `,'"));
2312	      if (! key->eword--)
2313		{
2314		  /* Provoke with `sort -k1,0' */
2315		  badfieldspec (optarg, N_("field number is zero"));
2316		}
2317	      if (*s == '.')
2318		s = parse_field_count (s + 1, &key->echar,
2319				       N_("invalid number after `.'"));
2320	      else
2321		{
2322		  /* `-k 2,3' is equivalent to `+1 -3'.  */
2323		  key->eword++;
2324		}
2325	      s = set_ordering (s, key, bl_end);
2326	    }
2327	  if (*s)
2328	    badfieldspec (optarg, N_("stray character in field spec"));
2329	  insertkey (key);
2330	  break;
2331
2332	case 'm':
2333	  mergeonly = true;
2334	  break;
2335
2336	case 'o':
2337	  if (outfile && !STREQ (outfile, optarg))
2338	    error (SORT_FAILURE, 0, _("multiple output files specified"));
2339	  outfile = optarg;
2340	  break;
2341
2342	case 's':
2343	  stable = true;
2344	  break;
2345
2346	case 'S':
2347	  specify_sort_size (optarg);
2348	  break;
2349
2350	case 't':
2351	  {
2352	    char newtab = optarg[0];
2353	    if (! newtab)
2354	      error (SORT_FAILURE, 0, _("empty tab"));
2355	    if (optarg[1])
2356	      {
2357		if (STREQ (optarg, "\\0"))
2358		  newtab = '\0';
2359		else
2360		  {
2361		    /* Provoke with `sort -txx'.  Complain about
2362		       "multi-character tab" instead of "multibyte tab", so
2363		       that the diagnostic's wording does not need to be
2364		       changed once multibyte characters are supported.  */
2365		    error (SORT_FAILURE, 0, _("multi-character tab %s"),
2366			   quote (optarg));
2367		  }
2368	      }
2369	    if (tab != TAB_DEFAULT && tab != newtab)
2370	      error (SORT_FAILURE, 0, _("incompatible tabs"));
2371	    tab = newtab;
2372	  }
2373	  break;
2374
2375	case 'T':
2376	  add_temp_dir (optarg);
2377	  break;
2378
2379	case 'u':
2380	  unique = true;
2381	  break;
2382
2383	case 'y':
2384	  /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
2385	     through Solaris 7.  It is also accepted by many non-Solaris
2386	     "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
2387	     -y is marked as obsolete starting with Solaris 8 (1999), but is
2388	     still accepted as of Solaris 10 prerelease (2004).
2389
2390	     Solaris 2.5.1 "sort -y 100" reads the input file "100", but
2391	     emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
2392	     and which in general ignores the argument after "-y" if it
2393	     consists entirely of digits (it can even be empty).  */
2394	  if (optarg == argv[optind - 1])
2395	    {
2396	      char const *p;
2397	      for (p = optarg; ISDIGIT (*p); p++)
2398		continue;
2399	      optind -= (*p != '\0');
2400	    }
2401	  break;
2402
2403	case 'z':
2404	  eolchar = 0;
2405	  break;
2406
2407	case_GETOPT_HELP_CHAR;
2408
2409	case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2410
2411	default:
2412	  usage (SORT_FAILURE);
2413	}
2414    }
2415
2416  /* Inheritance of global options to individual keys. */
2417  for (key = keylist; key; key = key->next)
2418    if (! (key->ignore || key->translate
2419	   || (key->skipsblanks | key->reverse
2420	       | key->skipeblanks | key->month | key->numeric
2421	       | key->general_numeric)))
2422      {
2423	key->ignore = gkey.ignore;
2424	key->translate = gkey.translate;
2425	key->skipsblanks = gkey.skipsblanks;
2426	key->skipeblanks = gkey.skipeblanks;
2427	key->month = gkey.month;
2428	key->numeric = gkey.numeric;
2429	key->general_numeric = gkey.general_numeric;
2430	key->reverse = gkey.reverse;
2431      }
2432
2433  if (!keylist && (gkey.ignore || gkey.translate
2434		   || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
2435		       | gkey.numeric | gkey.general_numeric)))
2436    insertkey (&gkey);
2437  reverse = gkey.reverse;
2438
2439  if (temp_dir_count == 0)
2440    {
2441      char const *tmp_dir = getenv ("TMPDIR");
2442      add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
2443    }
2444
2445  if (nfiles == 0)
2446    {
2447      nfiles = 1;
2448      files = &minus;
2449    }
2450
2451  if (checkonly)
2452    {
2453      if (nfiles > 1)
2454	{
2455	  error (0, 0, _("extra operand %s not allowed with -c"),
2456		 quote (files[1]));
2457	  usage (SORT_FAILURE);
2458	}
2459
2460      /* POSIX requires that sort return 1 IFF invoked with -c and the
2461	 input is not properly sorted.  */
2462      exit (check (files[0]) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
2463    }
2464
2465  if (mergeonly)
2466    merge (files, 0, nfiles, outfile);
2467  else
2468    sort (files, nfiles, outfile);
2469
2470  if (have_read_stdin && fclose (stdin) == EOF)
2471    die (_("close failed"), "-");
2472
2473  exit (EXIT_SUCCESS);
2474}
2475