1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include <sys/stat.h>
34#include <sys/sysctl.h>
35#include <sys/types.h>
36
37#include <err.h>
38#include <errno.h>
39#include <getopt.h>
40#include <limits.h>
41#include <locale.h>
42#include <md5.h>
43#include <regex.h>
44#include <signal.h>
45#include <stdbool.h>
46#include <stdio.h>
47#include <stdlib.h>
48#include <string.h>
49#include <unistd.h>
50#include <wchar.h>
51#include <wctype.h>
52
53#include "coll.h"
54#include "file.h"
55#include "sort.h"
56
57#ifndef WITHOUT_NLS
58#include <nl_types.h>
59nl_catd catalog;
60#endif
61
62#define	OPTIONS	"bcCdfghik:Mmno:RrsS:t:T:uVz"
63
64#define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
65#define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
66
67static bool need_random;
68static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
69static const void *random_seed;
70static size_t random_seed_size;
71
72MD5_CTX md5_ctx;
73
74/*
75 * Default messages to use when NLS is disabled or no catalogue
76 * is found.
77 */
78const char *nlsstr[] = { "",
79/* 1*/"mutually exclusive flags",
80/* 2*/"extra argument not allowed with -c",
81/* 3*/"Unknown feature",
82/* 4*/"Wrong memory buffer specification",
83/* 5*/"0 field in key specs",
84/* 6*/"0 column in key specs",
85/* 7*/"Wrong file mode",
86/* 8*/"Cannot open file for reading",
87/* 9*/"Radix sort cannot be used with these sort options",
88/*10*/"The chosen sort method cannot be used with stable and/or unique sort",
89/*11*/"Invalid key position",
90/*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
91      "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
92      "[-o outfile] [--batch-size size] [--files0-from file] "
93      "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
94      "[--mmap] "
95#if defined(SORT_THREADS)
96      "[--parallel thread_no] "
97#endif
98      "[--human-numeric-sort] "
99      "[--version-sort] [--random-sort [--random-source file]] "
100      "[--compress-program program] [file ...]\n" };
101
102struct sort_opts sort_opts_vals;
103
104bool debug_sort;
105bool need_hint;
106
107#if defined(SORT_THREADS)
108unsigned int ncpu = 1;
109size_t nthreads = 1;
110#endif
111
112static bool gnusort_numeric_compatibility;
113
114static struct sort_mods default_sort_mods_object;
115struct sort_mods * const default_sort_mods = &default_sort_mods_object;
116
117static bool print_symbols_on_debug;
118
119/*
120 * Arguments from file (when file0-from option is used:
121 */
122static size_t argc_from_file0 = (size_t)-1;
123static char **argv_from_file0;
124
125/*
126 * Placeholder symbols for options which have no single-character equivalent
127 */
128enum
129{
130	SORT_OPT = CHAR_MAX + 1,
131	HELP_OPT,
132	FF_OPT,
133	BS_OPT,
134	VERSION_OPT,
135	DEBUG_OPT,
136#if defined(SORT_THREADS)
137	PARALLEL_OPT,
138#endif
139	RANDOMSOURCE_OPT,
140	COMPRESSPROGRAM_OPT,
141	QSORT_OPT,
142	MERGESORT_OPT,
143	HEAPSORT_OPT,
144	RADIXSORT_OPT,
145	MMAP_OPT
146};
147
148#define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
149static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
150
151static struct option long_options[] = {
152				{ "batch-size", required_argument, NULL, BS_OPT },
153				{ "buffer-size", required_argument, NULL, 'S' },
154				{ "check", optional_argument, NULL, 'c' },
155				{ "check=silent|quiet", optional_argument, NULL, 'C' },
156				{ "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
157				{ "debug", no_argument, NULL, DEBUG_OPT },
158				{ "dictionary-order", no_argument, NULL, 'd' },
159				{ "field-separator", required_argument, NULL, 't' },
160				{ "files0-from", required_argument, NULL, FF_OPT },
161				{ "general-numeric-sort", no_argument, NULL, 'g' },
162				{ "heapsort", no_argument, NULL, HEAPSORT_OPT },
163				{ "help",no_argument, NULL, HELP_OPT },
164				{ "human-numeric-sort", no_argument, NULL, 'h' },
165				{ "ignore-leading-blanks", no_argument, NULL, 'b' },
166				{ "ignore-case", no_argument, NULL, 'f' },
167				{ "ignore-nonprinting", no_argument, NULL, 'i' },
168				{ "key", required_argument, NULL, 'k' },
169				{ "merge", no_argument, NULL, 'm' },
170				{ "mergesort", no_argument, NULL, MERGESORT_OPT },
171				{ "mmap", no_argument, NULL, MMAP_OPT },
172				{ "month-sort", no_argument, NULL, 'M' },
173				{ "numeric-sort", no_argument, NULL, 'n' },
174				{ "output", required_argument, NULL, 'o' },
175#if defined(SORT_THREADS)
176				{ "parallel", required_argument, NULL, PARALLEL_OPT },
177#endif
178				{ "qsort", no_argument, NULL, QSORT_OPT },
179				{ "radixsort", no_argument, NULL, RADIXSORT_OPT },
180				{ "random-sort", no_argument, NULL, 'R' },
181				{ "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
182				{ "reverse", no_argument, NULL, 'r' },
183				{ "sort", required_argument, NULL, SORT_OPT },
184				{ "stable", no_argument, NULL, 's' },
185				{ "temporary-directory",required_argument, NULL, 'T' },
186				{ "unique", no_argument, NULL, 'u' },
187				{ "version", no_argument, NULL, VERSION_OPT },
188				{ "version-sort",no_argument, NULL, 'V' },
189				{ "zero-terminated", no_argument, NULL, 'z' },
190				{ NULL, no_argument, NULL, 0 }
191};
192
193void fix_obsolete_keys(int *argc, char **argv);
194
195/*
196 * Check where sort modifier is present
197 */
198static bool
199sort_modifier_empty(struct sort_mods *sm)
200{
201
202	if (sm == NULL)
203		return (true);
204	return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
205	    sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
206}
207
208/*
209 * Print out usage text.
210 */
211static void
212usage(bool opt_err)
213{
214	FILE *out;
215
216	out = opt_err ? stderr : stdout;
217
218	fprintf(out, getstr(12), getprogname());
219	if (opt_err)
220		exit(2);
221	exit(0);
222}
223
224/*
225 * Read input file names from a file (file0-from option).
226 */
227static void
228read_fns_from_file0(const char *fn)
229{
230	FILE *f;
231	char *line = NULL;
232	size_t linesize = 0;
233	ssize_t linelen;
234
235	if (fn == NULL)
236		return;
237
238	f = fopen(fn, "r");
239	if (f == NULL)
240		err(2, "%s", fn);
241
242	while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
243		if (*line != '\0') {
244			if (argc_from_file0 == (size_t) - 1)
245				argc_from_file0 = 0;
246			++argc_from_file0;
247			argv_from_file0 = sort_realloc(argv_from_file0,
248			    argc_from_file0 * sizeof(char *));
249			if (argv_from_file0 == NULL)
250				err(2, NULL);
251			argv_from_file0[argc_from_file0 - 1] = line;
252		} else {
253			free(line);
254		}
255		line = NULL;
256		linesize = 0;
257	}
258	if (ferror(f))
259		err(2, "%s: getdelim", fn);
260
261	closefile(f, fn);
262}
263
264/*
265 * Check how much RAM is available for the sort.
266 */
267static void
268set_hw_params(void)
269{
270	long pages, psize;
271
272#if defined(SORT_THREADS)
273	ncpu = 1;
274#endif
275
276	pages = sysconf(_SC_PHYS_PAGES);
277	if (pages < 1) {
278		perror("sysconf pages");
279		pages = 1;
280	}
281	psize = sysconf(_SC_PAGESIZE);
282	if (psize < 1) {
283		perror("sysconf psize");
284		psize = 4096;
285	}
286#if defined(SORT_THREADS)
287	ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
288	if (ncpu < 1)
289		ncpu = 1;
290	else if(ncpu > 32)
291		ncpu = 32;
292
293	nthreads = ncpu;
294#endif
295
296	free_memory = (unsigned long long) pages * (unsigned long long) psize;
297	available_free_memory = free_memory / 2;
298
299	if (available_free_memory < 1024)
300		available_free_memory = 1024;
301}
302
303/*
304 * Convert "plain" symbol to wide symbol, with default value.
305 */
306static void
307conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
308{
309
310	if (wc && c) {
311		int res;
312
313		res = mbtowc(wc, c, MB_CUR_MAX);
314		if (res < 1)
315			*wc = def;
316	}
317}
318
319/*
320 * Set current locale symbols.
321 */
322static void
323set_locale(void)
324{
325	struct lconv *lc;
326	const char *locale;
327
328	setlocale(LC_ALL, "");
329
330	lc = localeconv();
331
332	if (lc) {
333		/* obtain LC_NUMERIC info */
334		/* Convert to wide char form */
335		conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
336		    symbol_decimal_point);
337		conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
338		    symbol_thousands_sep);
339		conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
340		    symbol_positive_sign);
341		conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
342		    symbol_negative_sign);
343	}
344
345	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
346		gnusort_numeric_compatibility = true;
347
348	locale = setlocale(LC_COLLATE, NULL);
349
350	if (locale) {
351		char *tmpl;
352		const char *cclocale;
353
354		tmpl = sort_strdup(locale);
355		cclocale = setlocale(LC_COLLATE, "C");
356		if (cclocale && !strcmp(cclocale, tmpl))
357			byte_sort = true;
358		else {
359			const char *pclocale;
360
361			pclocale = setlocale(LC_COLLATE, "POSIX");
362			if (pclocale && !strcmp(pclocale, tmpl))
363				byte_sort = true;
364		}
365		setlocale(LC_COLLATE, tmpl);
366		sort_free(tmpl);
367	}
368}
369
370/*
371 * Set directory temporary files.
372 */
373static void
374set_tmpdir(void)
375{
376	char *td;
377
378	td = getenv("TMPDIR");
379	if (td != NULL)
380		tmpdir = sort_strdup(td);
381}
382
383/*
384 * Parse -S option.
385 */
386static unsigned long long
387parse_memory_buffer_value(const char *value)
388{
389
390	if (value == NULL)
391		return (available_free_memory);
392	else {
393		char *endptr;
394		unsigned long long membuf;
395
396		endptr = NULL;
397		errno = 0;
398		membuf = strtoll(value, &endptr, 10);
399
400		if (errno != 0) {
401			warn("%s",getstr(4));
402			membuf = available_free_memory;
403		} else {
404			switch (*endptr){
405			case 'Y':
406				membuf *= 1024;
407				/* FALLTHROUGH */
408			case 'Z':
409				membuf *= 1024;
410				/* FALLTHROUGH */
411			case 'E':
412				membuf *= 1024;
413				/* FALLTHROUGH */
414			case 'P':
415				membuf *= 1024;
416				/* FALLTHROUGH */
417			case 'T':
418				membuf *= 1024;
419				/* FALLTHROUGH */
420			case 'G':
421				membuf *= 1024;
422				/* FALLTHROUGH */
423			case 'M':
424				membuf *= 1024;
425				/* FALLTHROUGH */
426			case '\0':
427			case 'K':
428				membuf *= 1024;
429				/* FALLTHROUGH */
430			case 'b':
431				break;
432			case '%':
433				membuf = (available_free_memory * membuf) /
434				    100;
435				break;
436			default:
437				warnc(EINVAL, "%s", optarg);
438				membuf = available_free_memory;
439			}
440		}
441		return (membuf);
442	}
443}
444
445/*
446 * Signal handler that clears the temporary files.
447 */
448static void
449sig_handler(int sig __unused, siginfo_t *siginfo __unused,
450    void *context __unused)
451{
452
453	clear_tmp_files();
454	exit(-1);
455}
456
457/*
458 * Set signal handler on panic signals.
459 */
460static void
461set_signal_handler(void)
462{
463	struct sigaction sa;
464
465	memset(&sa, 0, sizeof(sa));
466	sa.sa_sigaction = &sig_handler;
467	sa.sa_flags = SA_SIGINFO;
468
469	if (sigaction(SIGTERM, &sa, NULL) < 0) {
470		perror("sigaction");
471		return;
472	}
473	if (sigaction(SIGHUP, &sa, NULL) < 0) {
474		perror("sigaction");
475		return;
476	}
477	if (sigaction(SIGINT, &sa, NULL) < 0) {
478		perror("sigaction");
479		return;
480	}
481	if (sigaction(SIGQUIT, &sa, NULL) < 0) {
482		perror("sigaction");
483		return;
484	}
485	if (sigaction(SIGABRT, &sa, NULL) < 0) {
486		perror("sigaction");
487		return;
488	}
489	if (sigaction(SIGBUS, &sa, NULL) < 0) {
490		perror("sigaction");
491		return;
492	}
493	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
494		perror("sigaction");
495		return;
496	}
497	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
498		perror("sigaction");
499		return;
500	}
501	if (sigaction(SIGUSR2, &sa, NULL) < 0) {
502		perror("sigaction");
503		return;
504	}
505}
506
507/*
508 * Print "unknown" message and exit with status 2.
509 */
510static void
511unknown(const char *what)
512{
513
514	errx(2, "%s: %s", getstr(3), what);
515}
516
517/*
518 * Check whether contradictory input options are used.
519 */
520static void
521check_mutually_exclusive_flags(char c, bool *mef_flags)
522{
523	int fo_index, mec;
524	bool found_others, found_this;
525
526	found_others = found_this = false;
527	fo_index = 0;
528
529	for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
530		mec = mutually_exclusive_flags[i];
531
532		if (mec != c) {
533			if (mef_flags[i]) {
534				if (found_this)
535					errx(1, "%c:%c: %s", c, mec, getstr(1));
536				found_others = true;
537				fo_index = i;
538			}
539		} else {
540			if (found_others)
541				errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
542			mef_flags[i] = true;
543			found_this = true;
544		}
545	}
546}
547
548/*
549 * Initialise sort opts data.
550 */
551static void
552set_sort_opts(void)
553{
554
555	memset(&default_sort_mods_object, 0,
556	    sizeof(default_sort_mods_object));
557	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
558	default_sort_mods_object.func =
559	    get_sort_func(&default_sort_mods_object);
560}
561
562/*
563 * Set a sort modifier on a sort modifiers object.
564 */
565static bool
566set_sort_modifier(struct sort_mods *sm, int c)
567{
568
569	if (sm) {
570		switch (c){
571		case 'b':
572			sm->bflag = true;
573			break;
574		case 'd':
575			sm->dflag = true;
576			break;
577		case 'f':
578			sm->fflag = true;
579			break;
580		case 'g':
581			sm->gflag = true;
582			need_hint = true;
583			break;
584		case 'i':
585			sm->iflag = true;
586			break;
587		case 'R':
588			sm->Rflag = true;
589			need_random = true;
590			break;
591		case 'M':
592			initialise_months();
593			sm->Mflag = true;
594			need_hint = true;
595			break;
596		case 'n':
597			sm->nflag = true;
598			need_hint = true;
599			print_symbols_on_debug = true;
600			break;
601		case 'r':
602			sm->rflag = true;
603			break;
604		case 'V':
605			sm->Vflag = true;
606			break;
607		case 'h':
608			sm->hflag = true;
609			need_hint = true;
610			print_symbols_on_debug = true;
611			break;
612		default:
613			return false;
614		}
615		sort_opts_vals.complex_sort = true;
616		sm->func = get_sort_func(sm);
617	}
618	return (true);
619}
620
621/*
622 * Parse POS in -k option.
623 */
624static int
625parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
626{
627	regmatch_t pmatch[4];
628	regex_t re;
629	char *c, *f;
630	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
631	size_t len, nmatch;
632	int ret;
633
634	ret = -1;
635	nmatch = 4;
636	c = f = NULL;
637
638	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
639		return (-1);
640
641	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
642		goto end;
643
644	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
645		goto end;
646
647	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
648		goto end;
649
650	len = pmatch[1].rm_eo - pmatch[1].rm_so;
651	f = sort_malloc((len + 1) * sizeof(char));
652
653	strncpy(f, s + pmatch[1].rm_so, len);
654	f[len] = '\0';
655
656	if (second) {
657		errno = 0;
658		ks->f2 = (size_t) strtoul(f, NULL, 10);
659		if (errno != 0)
660			err(2, "-k");
661		if (ks->f2 == 0) {
662			warn("%s",getstr(5));
663			goto end;
664		}
665	} else {
666		errno = 0;
667		ks->f1 = (size_t) strtoul(f, NULL, 10);
668		if (errno != 0)
669			err(2, "-k");
670		if (ks->f1 == 0) {
671			warn("%s",getstr(5));
672			goto end;
673		}
674	}
675
676	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
677		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
678		c = sort_malloc((len + 1) * sizeof(char));
679
680		strncpy(c, s + pmatch[2].rm_so + 1, len);
681		c[len] = '\0';
682
683		if (second) {
684			errno = 0;
685			ks->c2 = (size_t) strtoul(c, NULL, 10);
686			if (errno != 0)
687				err(2, "-k");
688		} else {
689			errno = 0;
690			ks->c1 = (size_t) strtoul(c, NULL, 10);
691			if (errno != 0)
692				err(2, "-k");
693			if (ks->c1 == 0) {
694				warn("%s",getstr(6));
695				goto end;
696			}
697		}
698	} else {
699		if (second)
700			ks->c2 = 0;
701		else
702			ks->c1 = 1;
703	}
704
705	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
706		regoff_t i = 0;
707
708		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
709			check_mutually_exclusive_flags(s[i], mef_flags);
710			if (s[i] == 'b') {
711				if (second)
712					ks->pos2b = true;
713				else
714					ks->pos1b = true;
715			} else if (!set_sort_modifier(&(ks->sm), s[i]))
716				goto end;
717		}
718	}
719
720	ret = 0;
721
722end:
723
724	if (c)
725		sort_free(c);
726	if (f)
727		sort_free(f);
728	regfree(&re);
729
730	return (ret);
731}
732
733/*
734 * Parse -k option value.
735 */
736static int
737parse_k(const char *s, struct key_specs *ks)
738{
739	int ret = -1;
740	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
741	    { false, false, false, false, false, false };
742
743	if (s && *s) {
744		char *sptr;
745
746		sptr = strchr(s, ',');
747		if (sptr) {
748			size_t size1;
749			char *pos1, *pos2;
750
751			size1 = sptr - s;
752
753			if (size1 < 1)
754				return (-1);
755			pos1 = sort_malloc((size1 + 1) * sizeof(char));
756
757			strncpy(pos1, s, size1);
758			pos1[size1] = '\0';
759
760			ret = parse_pos(pos1, ks, mef_flags, false);
761
762			sort_free(pos1);
763			if (ret < 0)
764				return (ret);
765
766			pos2 = sort_strdup(sptr + 1);
767			ret = parse_pos(pos2, ks, mef_flags, true);
768			sort_free(pos2);
769		} else
770			ret = parse_pos(s, ks, mef_flags, false);
771	}
772
773	return (ret);
774}
775
776/*
777 * Parse POS in +POS -POS option.
778 */
779static int
780parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
781{
782	regex_t re;
783	regmatch_t pmatch[4];
784	char *c, *f;
785	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
786	int ret;
787	size_t len, nmatch;
788
789	ret = -1;
790	nmatch = 4;
791	c = f = NULL;
792	*nc = *nf = 0;
793
794	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
795		return (-1);
796
797	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
798		goto end;
799
800	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
801		goto end;
802
803	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
804		goto end;
805
806	len = pmatch[1].rm_eo - pmatch[1].rm_so;
807	f = sort_malloc((len + 1) * sizeof(char));
808
809	strncpy(f, s + pmatch[1].rm_so, len);
810	f[len] = '\0';
811
812	errno = 0;
813	*nf = (size_t) strtoul(f, NULL, 10);
814	if (errno != 0)
815		errx(2, "%s", getstr(11));
816
817	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
818		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
819		c = sort_malloc((len + 1) * sizeof(char));
820
821		strncpy(c, s + pmatch[2].rm_so + 1, len);
822		c[len] = '\0';
823
824		errno = 0;
825		*nc = (size_t) strtoul(c, NULL, 10);
826		if (errno != 0)
827			errx(2, "%s", getstr(11));
828	}
829
830	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
831
832		len = pmatch[3].rm_eo - pmatch[3].rm_so;
833
834		strncpy(sopts, s + pmatch[3].rm_so, len);
835		sopts[len] = '\0';
836	}
837
838	ret = 0;
839
840end:
841	if (c)
842		sort_free(c);
843	if (f)
844		sort_free(f);
845	regfree(&re);
846
847	return (ret);
848}
849
850/*
851 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
852 */
853void
854fix_obsolete_keys(int *argc, char **argv)
855{
856	char sopt[129];
857
858	for (int i = 1; i < *argc; i++) {
859		char *arg1;
860
861		arg1 = argv[i];
862
863		if (strlen(arg1) > 1 && arg1[0] == '+') {
864			int c1, f1;
865			char sopts1[128];
866
867			sopts1[0] = 0;
868			c1 = f1 = 0;
869
870			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
871				continue;
872			else {
873				f1 += 1;
874				c1 += 1;
875				if (i + 1 < *argc) {
876					char *arg2 = argv[i + 1];
877
878					if (strlen(arg2) > 1 &&
879					    arg2[0] == '-') {
880						int c2, f2;
881						char sopts2[128];
882
883						sopts2[0] = 0;
884						c2 = f2 = 0;
885
886						if (parse_pos_obs(arg2 + 1,
887						    &f2, &c2, sopts2) >= 0) {
888							if (c2 > 0)
889								f2 += 1;
890							sprintf(sopt, "-k%d.%d%s,%d.%d%s",
891							    f1, c1, sopts1, f2, c2, sopts2);
892							argv[i] = sort_strdup(sopt);
893							for (int j = i + 1; j + 1 < *argc; j++)
894								argv[j] = argv[j + 1];
895							*argc -= 1;
896							continue;
897						}
898					}
899				}
900				sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
901				argv[i] = sort_strdup(sopt);
902			}
903		}
904	}
905}
906
907/*
908 * Set random seed
909 */
910static void
911set_random_seed(void)
912{
913	if (need_random) {
914
915		if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
916			FILE* fseed;
917			MD5_CTX ctx;
918			char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
919			size_t sz = 0;
920
921			fseed = openfile(random_source, "r");
922			while (!feof(fseed)) {
923				int cr;
924
925				cr = fgetc(fseed);
926				if (cr == EOF)
927					break;
928
929				rsd[sz++] = (char) cr;
930
931				if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
932					break;
933			}
934
935			closefile(fseed, random_source);
936
937			MD5Init(&ctx);
938			MD5Update(&ctx, rsd, sz);
939
940			random_seed = MD5End(&ctx, NULL);
941			random_seed_size = strlen(random_seed);
942
943		} else {
944			MD5_CTX ctx;
945			char *b;
946
947			MD5Init(&ctx);
948			b = MD5File(random_source, NULL);
949			if (b == NULL)
950				err(2, NULL);
951
952			random_seed = b;
953			random_seed_size = strlen(b);
954		}
955
956		MD5Init(&md5_ctx);
957		if(random_seed_size>0) {
958			MD5Update(&md5_ctx, random_seed, random_seed_size);
959		}
960	}
961}
962
963/*
964 * Main function.
965 */
966int
967main(int argc, char **argv)
968{
969	char *outfile, *real_outfile;
970	int c, result;
971	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
972	    { false, false, false, false, false, false };
973
974	result = 0;
975	outfile = sort_strdup("-");
976	real_outfile = NULL;
977
978	struct sort_mods *sm = &default_sort_mods_object;
979
980	init_tmp_files();
981
982	set_signal_handler();
983
984	set_hw_params();
985	set_locale();
986	set_tmpdir();
987	set_sort_opts();
988
989	fix_obsolete_keys(&argc, argv);
990
991	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
992	    != -1)) {
993
994		check_mutually_exclusive_flags(c, mef_flags);
995
996		if (!set_sort_modifier(sm, c)) {
997
998			switch (c) {
999			case 'c':
1000				sort_opts_vals.cflag = true;
1001				if (optarg) {
1002					if (!strcmp(optarg, "diagnose-first"))
1003						;
1004					else if (!strcmp(optarg, "silent") ||
1005					    !strcmp(optarg, "quiet"))
1006						sort_opts_vals.csilentflag = true;
1007					else if (*optarg)
1008						unknown(optarg);
1009				}
1010				break;
1011			case 'C':
1012				sort_opts_vals.cflag = true;
1013				sort_opts_vals.csilentflag = true;
1014				break;
1015			case 'k':
1016			{
1017				sort_opts_vals.complex_sort = true;
1018				sort_opts_vals.kflag = true;
1019
1020				keys_num++;
1021				keys = sort_realloc(keys, keys_num *
1022				    sizeof(struct key_specs));
1023				memset(&(keys[keys_num - 1]), 0,
1024				    sizeof(struct key_specs));
1025
1026				if (parse_k(optarg, &(keys[keys_num - 1]))
1027				    < 0) {
1028					errc(2, EINVAL, "-k %s", optarg);
1029				}
1030
1031				break;
1032			}
1033			case 'm':
1034				sort_opts_vals.mflag = true;
1035				break;
1036			case 'o':
1037				outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1038				strcpy(outfile, optarg);
1039				break;
1040			case 's':
1041				sort_opts_vals.sflag = true;
1042				break;
1043			case 'S':
1044				available_free_memory =
1045				    parse_memory_buffer_value(optarg);
1046				break;
1047			case 'T':
1048				tmpdir = sort_strdup(optarg);
1049				break;
1050			case 't':
1051				while (strlen(optarg) > 1) {
1052					if (optarg[0] != '\\') {
1053						errc(2, EINVAL, "%s", optarg);
1054					}
1055					optarg += 1;
1056					if (*optarg == '0') {
1057						*optarg = 0;
1058						break;
1059					}
1060				}
1061				sort_opts_vals.tflag = true;
1062				sort_opts_vals.field_sep = btowc(optarg[0]);
1063				if (sort_opts_vals.field_sep == WEOF) {
1064					errno = EINVAL;
1065					err(2, NULL);
1066				}
1067				if (!gnusort_numeric_compatibility) {
1068					if (symbol_decimal_point == sort_opts_vals.field_sep)
1069						symbol_decimal_point = WEOF;
1070					if (symbol_thousands_sep == sort_opts_vals.field_sep)
1071						symbol_thousands_sep = WEOF;
1072					if (symbol_negative_sign == sort_opts_vals.field_sep)
1073						symbol_negative_sign = WEOF;
1074					if (symbol_positive_sign == sort_opts_vals.field_sep)
1075						symbol_positive_sign = WEOF;
1076				}
1077				break;
1078			case 'u':
1079				sort_opts_vals.uflag = true;
1080				/* stable sort for the correct unique val */
1081				sort_opts_vals.sflag = true;
1082				break;
1083			case 'z':
1084				sort_opts_vals.zflag = true;
1085				break;
1086			case SORT_OPT:
1087				if (optarg) {
1088					if (!strcmp(optarg, "general-numeric"))
1089						set_sort_modifier(sm, 'g');
1090					else if (!strcmp(optarg, "human-numeric"))
1091						set_sort_modifier(sm, 'h');
1092					else if (!strcmp(optarg, "numeric"))
1093						set_sort_modifier(sm, 'n');
1094					else if (!strcmp(optarg, "month"))
1095						set_sort_modifier(sm, 'M');
1096					else if (!strcmp(optarg, "random"))
1097						set_sort_modifier(sm, 'R');
1098					else
1099						unknown(optarg);
1100				}
1101				break;
1102#if defined(SORT_THREADS)
1103			case PARALLEL_OPT:
1104				nthreads = (size_t)(atoi(optarg));
1105				if (nthreads < 1)
1106					nthreads = 1;
1107				if (nthreads > 1024)
1108					nthreads = 1024;
1109				break;
1110#endif
1111			case QSORT_OPT:
1112				sort_opts_vals.sort_method = SORT_QSORT;
1113				break;
1114			case MERGESORT_OPT:
1115				sort_opts_vals.sort_method = SORT_MERGESORT;
1116				break;
1117			case MMAP_OPT:
1118				use_mmap = true;
1119				break;
1120			case HEAPSORT_OPT:
1121				sort_opts_vals.sort_method = SORT_HEAPSORT;
1122				break;
1123			case RADIXSORT_OPT:
1124				sort_opts_vals.sort_method = SORT_RADIXSORT;
1125				break;
1126			case RANDOMSOURCE_OPT:
1127				random_source = strdup(optarg);
1128				break;
1129			case COMPRESSPROGRAM_OPT:
1130				compress_program = strdup(optarg);
1131				break;
1132			case FF_OPT:
1133				read_fns_from_file0(optarg);
1134				break;
1135			case BS_OPT:
1136			{
1137				errno = 0;
1138				long mof = strtol(optarg, NULL, 10);
1139				if (errno != 0)
1140					err(2, "--batch-size");
1141				if (mof >= 2)
1142					max_open_files = (size_t) mof + 1;
1143			}
1144				break;
1145			case VERSION_OPT:
1146				printf("%s\n", VERSION);
1147				exit(EXIT_SUCCESS);
1148				/* NOTREACHED */
1149				break;
1150			case DEBUG_OPT:
1151				debug_sort = true;
1152				break;
1153			case HELP_OPT:
1154				usage(false);
1155				/* NOTREACHED */
1156				break;
1157			default:
1158				usage(true);
1159				/* NOTREACHED */
1160			}
1161		}
1162	}
1163
1164	argc -= optind;
1165	argv += optind;
1166
1167	if (argv_from_file0) {
1168		argc = argc_from_file0;
1169		argv = argv_from_file0;
1170	}
1171
1172#ifndef WITHOUT_NLS
1173	catalog = catopen("sort", NL_CAT_LOCALE);
1174#endif
1175
1176	if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1177		errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1178
1179#ifndef WITHOUT_NLS
1180	catclose(catalog);
1181#endif
1182
1183	if (keys_num == 0) {
1184		keys_num = 1;
1185		keys = sort_realloc(keys, sizeof(struct key_specs));
1186		memset(&(keys[0]), 0, sizeof(struct key_specs));
1187		keys[0].c1 = 1;
1188		keys[0].pos1b = default_sort_mods->bflag;
1189		keys[0].pos2b = default_sort_mods->bflag;
1190		memcpy(&(keys[0].sm), default_sort_mods,
1191		    sizeof(struct sort_mods));
1192	}
1193
1194	for (size_t i = 0; i < keys_num; i++) {
1195		struct key_specs *ks;
1196
1197		ks = &(keys[i]);
1198
1199		if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1200		    !(ks->pos2b)) {
1201			ks->pos1b = sm->bflag;
1202			ks->pos2b = sm->bflag;
1203			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1204		}
1205
1206		ks->sm.func = get_sort_func(&(ks->sm));
1207	}
1208
1209	if (debug_sort) {
1210		printf("Memory to be used for sorting: %llu\n",available_free_memory);
1211#if defined(SORT_THREADS)
1212		printf("Number of CPUs: %d\n",(int)ncpu);
1213		nthreads = 1;
1214#endif
1215		printf("Using collate rules of %s locale\n",
1216		    setlocale(LC_COLLATE, NULL));
1217		if (byte_sort)
1218			printf("Byte sort is used\n");
1219		if (print_symbols_on_debug) {
1220			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1221			if (symbol_thousands_sep)
1222				printf("Thousands separator: <%lc>\n",
1223				    symbol_thousands_sep);
1224			printf("Positive sign: <%lc>\n", symbol_positive_sign);
1225			printf("Negative sign: <%lc>\n", symbol_negative_sign);
1226		}
1227	}
1228
1229	set_random_seed();
1230
1231	/* Case when the outfile equals one of the input files: */
1232	if (strcmp(outfile, "-")) {
1233
1234		for(int i = 0; i < argc; ++i) {
1235			if (strcmp(argv[i], outfile) == 0) {
1236				real_outfile = sort_strdup(outfile);
1237				for(;;) {
1238					char* tmp = sort_malloc(strlen(outfile) +
1239					    strlen(".tmp") + 1);
1240
1241					strcpy(tmp, outfile);
1242					strcpy(tmp + strlen(tmp), ".tmp");
1243					sort_free(outfile);
1244					outfile = tmp;
1245					if (access(outfile, F_OK) < 0)
1246						break;
1247				}
1248				tmp_file_atexit(outfile);
1249			}
1250		}
1251	}
1252
1253#if defined(SORT_THREADS)
1254	if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1255		nthreads = 1;
1256#endif
1257
1258	if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1259		struct file_list fl;
1260		struct sort_list list;
1261
1262		sort_list_init(&list);
1263		file_list_init(&fl, true);
1264
1265		if (argc < 1)
1266			procfile("-", &list, &fl);
1267		else {
1268			while (argc > 0) {
1269				procfile(*argv, &list, &fl);
1270				--argc;
1271				++argv;
1272			}
1273		}
1274
1275		if (fl.count < 1)
1276			sort_list_to_file(&list, outfile);
1277		else {
1278			if (list.count > 0) {
1279				char *flast = new_tmp_file_name();
1280
1281				sort_list_to_file(&list, flast);
1282				file_list_add(&fl, flast, false);
1283			}
1284			merge_files(&fl, outfile);
1285		}
1286
1287		file_list_clean(&fl);
1288
1289		/*
1290		 * We are about to exit the program, so we can ignore
1291		 * the clean-up for speed
1292		 *
1293		 * sort_list_clean(&list);
1294		 */
1295
1296	} else if (sort_opts_vals.cflag) {
1297		result = (argc == 0) ? (check("-")) : (check(*argv));
1298	} else if (sort_opts_vals.mflag) {
1299		struct file_list fl;
1300
1301		file_list_init(&fl, false);
1302		/* No file arguments remaining means "read from stdin." */
1303		if (argc == 0)
1304			file_list_add(&fl, "-", true);
1305		else
1306			file_list_populate(&fl, argc, argv, true);
1307		merge_files(&fl, outfile);
1308		file_list_clean(&fl);
1309	}
1310
1311	if (real_outfile) {
1312		unlink(real_outfile);
1313		if (rename(outfile, real_outfile) < 0)
1314			err(2, NULL);
1315		sort_free(real_outfile);
1316	}
1317
1318	sort_free(outfile);
1319
1320	return (result);
1321}
1322