1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/11/usr.bin/sort/coll.c 330449 2018-03-05 07:26:05Z eadler $");
32
33#include <sys/types.h>
34
35#include <errno.h>
36#include <err.h>
37#include <langinfo.h>
38#include <limits.h>
39#include <math.h>
40#include <md5.h>
41#include <stdlib.h>
42#include <string.h>
43#include <wchar.h>
44#include <wctype.h>
45
46#include "coll.h"
47#include "vsort.h"
48
49struct key_specs *keys;
50size_t keys_num = 0;
51
52wint_t symbol_decimal_point = L'.';
53/* there is no default thousands separator in collate rules: */
54wint_t symbol_thousands_sep = 0;
55wint_t symbol_negative_sign = L'-';
56wint_t symbol_positive_sign = L'+';
57
58static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
59static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
60static int monthcoll(struct key_value*, struct key_value *, size_t offset);
61static int numcoll(struct key_value*, struct key_value *, size_t offset);
62static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
63static int randomcoll(struct key_value*, struct key_value *, size_t offset);
64static int versioncoll(struct key_value*, struct key_value *, size_t offset);
65
66/*
67 * Allocate keys array
68 */
69struct keys_array *
70keys_array_alloc(void)
71{
72	struct keys_array *ka;
73	size_t sz;
74
75	sz = keys_array_size();
76	ka = sort_malloc(sz);
77	memset(ka, 0, sz);
78
79	return (ka);
80}
81
82/*
83 * Calculate whether we need key hint space
84 */
85static size_t
86key_hint_size(void)
87{
88
89	return (need_hint ? sizeof(struct key_hint) : 0);
90}
91
92/*
93 * Calculate keys array size
94 */
95size_t
96keys_array_size(void)
97{
98
99	return (keys_num * (sizeof(struct key_value) + key_hint_size()));
100}
101
102/*
103 * Clean data of keys array
104 */
105void
106clean_keys_array(const struct bwstring *s, struct keys_array *ka)
107{
108
109	if (ka) {
110		for (size_t i = 0; i < keys_num; ++i) {
111			const struct key_value *kv;
112
113			kv = get_key_from_keys_array(ka, i);
114			if (kv->k && kv->k != s)
115				bwsfree(kv->k);
116		}
117		memset(ka, 0, keys_array_size());
118	}
119}
120
121/*
122 * Get pointer to a key value in the keys set
123 */
124struct key_value *
125get_key_from_keys_array(struct keys_array *ka, size_t ind)
126{
127
128	return ((struct key_value *)((caddr_t)ka->key +
129	    ind * (sizeof(struct key_value) + key_hint_size())));
130}
131
132/*
133 * Set value of a key in the keys set
134 */
135void
136set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
137{
138
139	if (ka && keys_num > ind) {
140		struct key_value *kv;
141
142		kv = get_key_from_keys_array(ka, ind);
143
144		if (kv->k && kv->k != s)
145			bwsfree(kv->k);
146		kv->k = s;
147	}
148}
149
150/*
151 * Initialize a sort list item
152 */
153struct sort_list_item *
154sort_list_item_alloc(void)
155{
156	struct sort_list_item *si;
157	size_t sz;
158
159	sz = sizeof(struct sort_list_item) + keys_array_size();
160	si = sort_malloc(sz);
161	memset(si, 0, sz);
162
163	return (si);
164}
165
166size_t
167sort_list_item_size(struct sort_list_item *si)
168{
169	size_t ret = 0;
170
171	if (si) {
172		ret = sizeof(struct sort_list_item) + keys_array_size();
173		if (si->str)
174			ret += bws_memsize(si->str);
175		for (size_t i = 0; i < keys_num; ++i) {
176			const struct key_value *kv;
177
178			kv = get_key_from_keys_array(&si->ka, i);
179
180			if (kv->k != si->str)
181				ret += bws_memsize(kv->k);
182		}
183	}
184	return (ret);
185}
186
187/*
188 * Calculate key for a sort list item
189 */
190static void
191sort_list_item_make_key(struct sort_list_item *si)
192{
193
194	preproc(si->str, &(si->ka));
195}
196
197/*
198 * Set value of a sort list item.
199 * Return combined string and keys memory size.
200 */
201void
202sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
203{
204
205	if (si) {
206		clean_keys_array(si->str, &(si->ka));
207		if (si->str) {
208			if (si->str == str) {
209				/* we are trying to reset the same string */
210				return;
211			} else {
212				bwsfree(si->str);
213				si->str = NULL;
214			}
215		}
216		si->str = str;
217		sort_list_item_make_key(si);
218	}
219}
220
221/*
222 * De-allocate a sort list item object memory
223 */
224void
225sort_list_item_clean(struct sort_list_item *si)
226{
227
228	if (si) {
229		clean_keys_array(si->str, &(si->ka));
230		if (si->str) {
231			bwsfree(si->str);
232			si->str = NULL;
233		}
234	}
235}
236
237/*
238 * Skip columns according to specs
239 */
240static size_t
241skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
242    bool skip_blanks, bool *empty_key)
243{
244	if (cols < 1)
245		return (BWSLEN(s) + 1);
246
247	if (skip_blanks)
248		while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
249			++start;
250
251	while (start < BWSLEN(s) && cols > 1) {
252		--cols;
253		++start;
254	}
255
256	if (start >= BWSLEN(s))
257		*empty_key = true;
258
259	return (start);
260}
261
262/*
263 * Skip fields according to specs
264 */
265static size_t
266skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
267{
268
269	if (fields < 2) {
270		if (BWSLEN(s) == 0)
271			*empty_field = true;
272		return (0);
273	} else if (!(sort_opts_vals.tflag)) {
274		size_t cpos = 0;
275		bool pb = true;
276
277		while (cpos < BWSLEN(s)) {
278			bool isblank;
279
280			isblank = iswblank(BWS_GET(s, cpos));
281
282			if (isblank && !pb) {
283				--fields;
284				if (fields <= 1)
285					return (cpos);
286			}
287			pb = isblank;
288			++cpos;
289		}
290		if (fields > 1)
291			*empty_field = true;
292		return (cpos);
293	} else {
294		size_t cpos = 0;
295
296		while (cpos < BWSLEN(s)) {
297			if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
298				--fields;
299				if (fields <= 1)
300					return (cpos + 1);
301			}
302			++cpos;
303		}
304		if (fields > 1)
305			*empty_field = true;
306		return (cpos);
307	}
308}
309
310/*
311 * Find fields start
312 */
313static void
314find_field_start(const struct bwstring *s, struct key_specs *ks,
315    size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
316{
317
318	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
319	if (!*empty_field)
320		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
321		    ks->pos1b, empty_key);
322	else
323		*empty_key = true;
324}
325
326/*
327 * Find end key position
328 */
329static size_t
330find_field_end(const struct bwstring *s, struct key_specs *ks)
331{
332	size_t f2, next_field_start, pos_end;
333	bool empty_field, empty_key;
334
335	pos_end = 0;
336	next_field_start = 0;
337	empty_field = false;
338	empty_key = false;
339	f2 = ks->f2;
340
341	if (f2 == 0)
342		return (BWSLEN(s) + 1);
343	else {
344		if (ks->c2 == 0) {
345			next_field_start = skip_fields_to_start(s, f2 + 1,
346			    &empty_field);
347			if ((next_field_start > 0) && sort_opts_vals.tflag &&
348			    ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
349			    next_field_start - 1)))
350				--next_field_start;
351		} else
352			next_field_start = skip_fields_to_start(s, f2,
353			    &empty_field);
354	}
355
356	if (empty_field || (next_field_start >= BWSLEN(s)))
357		return (BWSLEN(s) + 1);
358
359	if (ks->c2) {
360		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
361		    ks->pos2b, &empty_key);
362		if (pos_end < BWSLEN(s))
363			++pos_end;
364	} else
365		pos_end = next_field_start;
366
367	return (pos_end);
368}
369
370/*
371 * Cut a field according to the key specs
372 */
373static struct bwstring *
374cut_field(const struct bwstring *s, struct key_specs *ks)
375{
376	struct bwstring *ret = NULL;
377
378	if (s && ks) {
379		size_t field_start, key_end, key_start, sz;
380		bool empty_field, empty_key;
381
382		field_start = 0;
383		key_start = 0;
384		empty_field = false;
385		empty_key = false;
386
387		find_field_start(s, ks, &field_start, &key_start,
388		    &empty_field, &empty_key);
389
390		if (empty_key)
391			sz = 0;
392		else {
393			key_end = find_field_end(s, ks);
394			sz = (key_end < key_start) ? 0 : (key_end - key_start);
395		}
396
397		ret = bwsalloc(sz);
398		if (sz)
399			bwsnocpy(ret, s, key_start, sz);
400	} else
401		ret = bwsalloc(0);
402
403	return (ret);
404}
405
406/*
407 * Preprocesses a line applying the necessary transformations
408 * specified by command line options and returns the preprocessed
409 * string, which can be used to compare.
410 */
411int
412preproc(struct bwstring *s, struct keys_array *ka)
413{
414
415	if (sort_opts_vals.kflag)
416		for (size_t i = 0; i < keys_num; i++) {
417			struct bwstring *key;
418			struct key_specs *kspecs;
419			struct sort_mods *sm;
420
421			kspecs = &(keys[i]);
422			key = cut_field(s, kspecs);
423
424			sm = &(kspecs->sm);
425			if (sm->dflag)
426				key = dictionary_order(key);
427			else if (sm->iflag)
428				key = ignore_nonprinting(key);
429			if (sm->fflag || sm->Mflag)
430				key = ignore_case(key);
431
432			set_key_on_keys_array(ka, key, i);
433		}
434	else {
435		struct bwstring *ret = NULL;
436		struct sort_mods *sm = default_sort_mods;
437
438		if (sm->bflag) {
439			if (ret == NULL)
440				ret = bwsdup(s);
441			ret = ignore_leading_blanks(ret);
442		}
443		if (sm->dflag) {
444			if (ret == NULL)
445				ret = bwsdup(s);
446			ret = dictionary_order(ret);
447		} else if (sm->iflag) {
448			if (ret == NULL)
449				ret = bwsdup(s);
450			ret = ignore_nonprinting(ret);
451		}
452		if (sm->fflag || sm->Mflag) {
453			if (ret == NULL)
454				ret = bwsdup(s);
455			ret = ignore_case(ret);
456		}
457		if (ret == NULL)
458			set_key_on_keys_array(ka, s, 0);
459		else
460			set_key_on_keys_array(ka, ret, 0);
461	}
462
463	return 0;
464}
465
466cmpcoll_t
467get_sort_func(struct sort_mods *sm)
468{
469
470	if (sm->nflag)
471		return (numcoll);
472	else if (sm->hflag)
473		return (hnumcoll);
474	else if (sm->gflag)
475		return (gnumcoll);
476	else if (sm->Mflag)
477		return (monthcoll);
478	else if (sm->Rflag)
479		return (randomcoll);
480	else if (sm->Vflag)
481		return (versioncoll);
482	else
483		return (wstrcoll);
484}
485
486/*
487 * Compares the given strings.  Returns a positive number if
488 * the first precedes the second, a negative number if the second is
489 * the preceding one, and zero if they are equal.  This function calls
490 * the underlying collate functions, which done the actual comparison.
491 */
492int
493key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
494{
495	struct key_value *kv1, *kv2;
496	struct sort_mods *sm;
497	int res = 0;
498
499	for (size_t i = 0; i < keys_num; ++i) {
500		kv1 = get_key_from_keys_array(ps1, i);
501		kv2 = get_key_from_keys_array(ps2, i);
502		sm = &(keys[i].sm);
503
504		if (sm->rflag)
505			res = sm->func(kv2, kv1, offset);
506		else
507			res = sm->func(kv1, kv2, offset);
508
509		if (res)
510			break;
511
512		/* offset applies to only the first key */
513		offset = 0;
514	}
515	return (res);
516}
517
518/*
519 * Compare two strings.
520 * Plain symbol-by-symbol comparison.
521 */
522int
523top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
524{
525
526	if (default_sort_mods->rflag) {
527		const struct bwstring *tmp;
528
529		tmp = s1;
530		s1 = s2;
531		s2 = tmp;
532	}
533
534	return (bwscoll(s1, s2, 0));
535}
536
537/*
538 * Compare a string and a sort list item, according to the sort specs.
539 */
540int
541str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
542{
543	struct keys_array *ka1;
544	int ret = 0;
545
546	ka1 = keys_array_alloc();
547
548	preproc(str1, ka1);
549
550	sort_list_item_make_key(*ss2);
551
552	if (debug_sort) {
553		bwsprintf(stdout, str1, "; s1=<", ">");
554		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
555	}
556
557	ret = key_coll(ka1, &((*ss2)->ka), 0);
558
559	if (debug_sort)
560		printf("; cmp1=%d", ret);
561
562	clean_keys_array(str1, ka1);
563	sort_free(ka1);
564
565	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
566		ret = top_level_str_coll(str1, ((*ss2)->str));
567		if (debug_sort)
568			printf("; cmp2=%d", ret);
569	}
570
571	if (debug_sort)
572		printf("\n");
573
574	return (ret);
575}
576
577/*
578 * Compare two sort list items, according to the sort specs.
579 */
580int
581list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
582    size_t offset)
583{
584	int ret;
585
586	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
587
588	if (debug_sort) {
589		if (offset)
590			printf("; offset=%d", (int) offset);
591		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
592		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
593		printf("; cmp1=%d\n", ret);
594	}
595
596	if (ret)
597		return (ret);
598
599	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
600		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
601		if (debug_sort)
602			printf("; cmp2=%d\n", ret);
603	}
604
605	return (ret);
606}
607
608/*
609 * Compare two sort list items, according to the sort specs.
610 */
611int
612list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
613{
614
615	return (list_coll_offset(ss1, ss2, 0));
616}
617
618#define	LSCDEF(N)							\
619static int 								\
620list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)	\
621{									\
622									\
623	return (list_coll_offset(ss1, ss2, N));				\
624}
625
626LSCDEF(1)
627LSCDEF(2)
628LSCDEF(3)
629LSCDEF(4)
630LSCDEF(5)
631LSCDEF(6)
632LSCDEF(7)
633LSCDEF(8)
634LSCDEF(9)
635LSCDEF(10)
636LSCDEF(11)
637LSCDEF(12)
638LSCDEF(13)
639LSCDEF(14)
640LSCDEF(15)
641LSCDEF(16)
642LSCDEF(17)
643LSCDEF(18)
644LSCDEF(19)
645LSCDEF(20)
646
647listcoll_t
648get_list_call_func(size_t offset)
649{
650	static const listcoll_t lsarray[] = { list_coll, list_coll_1,
651	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
652	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
653	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
654	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
655	    list_coll_18, list_coll_19, list_coll_20 };
656
657	if (offset <= 20)
658		return (lsarray[offset]);
659
660	return (list_coll);
661}
662
663/*
664 * Compare two sort list items, only by their original string.
665 */
666int
667list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
668{
669
670	return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
671}
672
673/*
674 * Maximum size of a number in the string (before or after decimal point)
675 */
676#define MAX_NUM_SIZE (128)
677
678/*
679 * Set suffix value
680 */
681static void setsuffix(wchar_t c, unsigned char *si)
682{
683	switch (c){
684	case L'k':
685	case L'K':
686		*si = 1;
687		break;
688	case L'M':
689		*si = 2;
690		break;
691	case L'G':
692		*si = 3;
693		break;
694	case L'T':
695		*si = 4;
696		break;
697	case L'P':
698		*si = 5;
699		break;
700	case L'E':
701		*si = 6;
702		break;
703	case L'Z':
704		*si = 7;
705		break;
706	case L'Y':
707		*si = 8;
708		break;
709	default:
710		*si = 0;
711	}
712}
713
714/*
715 * Read string s and parse the string into a fixed-decimal-point number.
716 * sign equals -1 if the number is negative (explicit plus is not allowed,
717 * according to GNU sort's "info sort".
718 * The number part before decimal point is in the smain, after the decimal
719 * point is in sfrac, tail is the pointer to the remainder of the string.
720 */
721static int
722read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
723{
724	bwstring_iterator s;
725
726	s = bws_begin(s0);
727
728	/* always end the fraction with zero, even if we have no fraction */
729	sfrac[0] = 0;
730
731	while (iswblank(bws_get_iter_value(s)))
732		s = bws_iterator_inc(s, 1);
733
734	if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
735		*sign = -1;
736		s = bws_iterator_inc(s, 1);
737	}
738
739	// This is '0', not '\0', do not change this
740	while (iswdigit(bws_get_iter_value(s)) &&
741	    (bws_get_iter_value(s) == L'0'))
742		s = bws_iterator_inc(s, 1);
743
744	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
745		if (iswdigit(bws_get_iter_value(s))) {
746			smain[*main_len] = bws_get_iter_value(s);
747			s = bws_iterator_inc(s, 1);
748			*main_len += 1;
749		} else if (symbol_thousands_sep &&
750		    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
751			s = bws_iterator_inc(s, 1);
752		else
753			break;
754	}
755
756	smain[*main_len] = 0;
757
758	if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
759		s = bws_iterator_inc(s, 1);
760		while (iswdigit(bws_get_iter_value(s)) &&
761		    *frac_len < MAX_NUM_SIZE) {
762			sfrac[*frac_len] = bws_get_iter_value(s);
763			s = bws_iterator_inc(s, 1);
764			*frac_len += 1;
765		}
766		sfrac[*frac_len] = 0;
767
768		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
769			--(*frac_len);
770			sfrac[*frac_len] = L'\0';
771		}
772	}
773
774	setsuffix(bws_get_iter_value(s),si);
775
776	if ((*main_len + *frac_len) == 0)
777		*sign = 0;
778
779	return (0);
780}
781
782/*
783 * Implements string sort.
784 */
785static int
786wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
787{
788
789	if (debug_sort) {
790		if (offset)
791			printf("; offset=%d\n", (int) offset);
792		bwsprintf(stdout, kv1->k, "; k1=<", ">");
793		printf("(%zu)", BWSLEN(kv1->k));
794		bwsprintf(stdout, kv2->k, ", k2=<", ">");
795		printf("(%zu)", BWSLEN(kv2->k));
796	}
797
798	return (bwscoll(kv1->k, kv2->k, offset));
799}
800
801/*
802 * Compare two suffixes
803 */
804static inline int
805cmpsuffix(unsigned char si1, unsigned char si2)
806{
807
808	return ((char)si1 - (char)si2);
809}
810
811/*
812 * Implements numeric sort for -n and -h.
813 */
814static int
815numcoll_impl(struct key_value *kv1, struct key_value *kv2,
816    size_t offset __unused, bool use_suffix)
817{
818	struct bwstring *s1, *s2;
819	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
820	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
821	int cmp_res, sign1, sign2;
822	size_t frac1, frac2, main1, main2;
823	unsigned char SI1, SI2;
824	bool e1, e2, key1_read, key2_read;
825
826	s1 = kv1->k;
827	s2 = kv2->k;
828	sign1 = sign2 = 0;
829	main1 = main2 = 0;
830	frac1 = frac2 = 0;
831
832	cmp_res = 0;
833	key1_read = key2_read = false;
834
835	if (debug_sort) {
836		bwsprintf(stdout, s1, "; k1=<", ">");
837		bwsprintf(stdout, s2, ", k2=<", ">");
838	}
839
840	if (s1 == s2)
841		return (0);
842
843	if (kv1->hint->status == HS_UNINITIALIZED) {
844		/* read the number from the string */
845		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
846		key1_read = true;
847		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
848		if(main1 < 1 && frac1 < 1)
849			kv1->hint->v.nh.empty=true;
850		kv1->hint->v.nh.si = SI1;
851		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
852		    HS_INITIALIZED : HS_ERROR;
853		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
854	}
855
856	if (kv2->hint->status == HS_UNINITIALIZED) {
857		/* read the number from the string */
858		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
859		key2_read = true;
860		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
861		if(main2 < 1 && frac2 < 1)
862			kv2->hint->v.nh.empty=true;
863		kv2->hint->v.nh.si = SI2;
864		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
865		    HS_INITIALIZED : HS_ERROR;
866		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
867	}
868
869	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
870	    HS_INITIALIZED) {
871		unsigned long long n1, n2;
872		bool neg1, neg2;
873
874		e1 = kv1->hint->v.nh.empty;
875		e2 = kv2->hint->v.nh.empty;
876
877		if (e1 && e2)
878			return (0);
879
880		neg1 = kv1->hint->v.nh.neg;
881		neg2 = kv2->hint->v.nh.neg;
882
883		if (neg1 && !neg2)
884			return (-1);
885		if (neg2 && !neg1)
886			return (+1);
887
888		if (e1)
889			return (neg2 ? +1 : -1);
890		else if (e2)
891			return (neg1 ? -1 : +1);
892
893
894		if (use_suffix) {
895			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
896			if (cmp_res)
897				return (neg1 ? -cmp_res : cmp_res);
898		}
899
900		n1 = kv1->hint->v.nh.n1;
901		n2 = kv2->hint->v.nh.n1;
902		if (n1 < n2)
903			return (neg1 ? +1 : -1);
904		else if (n1 > n2)
905			return (neg1 ? -1 : +1);
906	}
907
908	/* read the numbers from the strings */
909	if (!key1_read)
910		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
911	if (!key2_read)
912		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
913
914	e1 = ((main1 + frac1) == 0);
915	e2 = ((main2 + frac2) == 0);
916
917	if (e1 && e2)
918		return (0);
919
920	/* we know the result if the signs are different */
921	if (sign1 < 0 && sign2 >= 0)
922		return (-1);
923	if (sign1 >= 0 && sign2 < 0)
924		return (+1);
925
926	if (e1)
927		return ((sign2 < 0) ? +1 : -1);
928	else if (e2)
929		return ((sign1 < 0) ? -1 : +1);
930
931	if (use_suffix) {
932		cmp_res = cmpsuffix(SI1, SI2);
933		if (cmp_res)
934			return ((sign1 < 0) ? -cmp_res : cmp_res);
935	}
936
937	/* if both numbers are empty assume that the strings are equal */
938	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
939		return (0);
940
941	/*
942	 * if the main part is of different size, we know the result
943	 * (because the leading zeros are removed)
944	 */
945	if (main1 < main2)
946		cmp_res = -1;
947	else if (main1 > main2)
948		cmp_res = +1;
949	/* if the sizes are equal then simple non-collate string compare gives the correct result */
950	else
951		cmp_res = wcscmp(smain1, smain2);
952
953	/* check fraction */
954	if (!cmp_res)
955		cmp_res = wcscmp(sfrac1, sfrac2);
956
957	if (!cmp_res)
958		return (0);
959
960	/* reverse result if the signs are negative */
961	if (sign1 < 0 && sign2 < 0)
962		cmp_res = -cmp_res;
963
964	return (cmp_res);
965}
966
967/*
968 * Implements numeric sort (-n).
969 */
970static int
971numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
972{
973
974	return (numcoll_impl(kv1, kv2, offset, false));
975}
976
977/*
978 * Implements 'human' numeric sort (-h).
979 */
980static int
981hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
982{
983
984	return (numcoll_impl(kv1, kv2, offset, true));
985}
986
987/*
988 * Implements random sort (-R).
989 */
990static int
991randomcoll(struct key_value *kv1, struct key_value *kv2,
992    size_t offset __unused)
993{
994	struct bwstring *s1, *s2;
995	MD5_CTX ctx1, ctx2;
996	char *b1, *b2;
997
998	s1 = kv1->k;
999	s2 = kv2->k;
1000
1001	if (debug_sort) {
1002		bwsprintf(stdout, s1, "; k1=<", ">");
1003		bwsprintf(stdout, s2, ", k2=<", ">");
1004	}
1005
1006	if (s1 == s2)
1007		return (0);
1008
1009	memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
1010	memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
1011
1012	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1013	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1014	b1 = MD5End(&ctx1, NULL);
1015	b2 = MD5End(&ctx2, NULL);
1016	if (b1 == NULL) {
1017		if (b2 == NULL)
1018			return (0);
1019		else {
1020			sort_free(b2);
1021			return (-1);
1022		}
1023	} else if (b2 == NULL) {
1024		sort_free(b1);
1025		return (+1);
1026	} else {
1027		int cmp_res;
1028
1029		cmp_res = strcmp(b1,b2);
1030		sort_free(b1);
1031		sort_free(b2);
1032
1033		if (!cmp_res)
1034			cmp_res = bwscoll(s1, s2, 0);
1035
1036		return (cmp_res);
1037	}
1038}
1039
1040/*
1041 * Implements version sort (-V).
1042 */
1043static int
1044versioncoll(struct key_value *kv1, struct key_value *kv2,
1045    size_t offset __unused)
1046{
1047	struct bwstring *s1, *s2;
1048
1049	s1 = kv1->k;
1050	s2 = kv2->k;
1051
1052	if (debug_sort) {
1053		bwsprintf(stdout, s1, "; k1=<", ">");
1054		bwsprintf(stdout, s2, ", k2=<", ">");
1055	}
1056
1057	if (s1 == s2)
1058		return (0);
1059
1060	return (vcmp(s1, s2));
1061}
1062
1063/*
1064 * Check for minus infinity
1065 */
1066static inline bool
1067huge_minus(double d, int err1)
1068{
1069
1070	if (err1 == ERANGE)
1071		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1072			return (+1);
1073
1074	return (0);
1075}
1076
1077/*
1078 * Check for plus infinity
1079 */
1080static inline bool
1081huge_plus(double d, int err1)
1082{
1083
1084	if (err1 == ERANGE)
1085		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1086			return (+1);
1087
1088	return (0);
1089}
1090
1091/*
1092 * Check whether a function is a NAN
1093 */
1094static bool
1095is_nan(double d)
1096{
1097
1098	return ((d == NAN) || (isnan(d)));
1099}
1100
1101/*
1102 * Compare two NANs
1103 */
1104static int
1105cmp_nans(double d1, double d2)
1106{
1107
1108	if (d1 < d2)
1109		return (-1);
1110	if (d1 > d2)
1111		return (+1);
1112	return (0);
1113}
1114
1115/*
1116 * Implements general numeric sort (-g).
1117 */
1118static int
1119gnumcoll(struct key_value *kv1, struct key_value *kv2,
1120    size_t offset __unused)
1121{
1122	double d1, d2;
1123	int err1, err2;
1124	bool empty1, empty2, key1_read, key2_read;
1125
1126	d1 = d2 = 0;
1127	err1 = err2 = 0;
1128	key1_read = key2_read = false;
1129
1130	if (debug_sort) {
1131		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1132		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1133	}
1134
1135	if (kv1->hint->status == HS_UNINITIALIZED) {
1136		errno = 0;
1137		d1 = bwstod(kv1->k, &empty1);
1138		err1 = errno;
1139
1140		if (empty1)
1141			kv1->hint->v.gh.notnum = true;
1142		else if (err1 == 0) {
1143			kv1->hint->v.gh.d = d1;
1144			kv1->hint->v.gh.nan = is_nan(d1);
1145			kv1->hint->status = HS_INITIALIZED;
1146		} else
1147			kv1->hint->status = HS_ERROR;
1148
1149		key1_read = true;
1150	}
1151
1152	if (kv2->hint->status == HS_UNINITIALIZED) {
1153		errno = 0;
1154		d2 = bwstod(kv2->k, &empty2);
1155		err2 = errno;
1156
1157		if (empty2)
1158			kv2->hint->v.gh.notnum = true;
1159		else if (err2 == 0) {
1160			kv2->hint->v.gh.d = d2;
1161			kv2->hint->v.gh.nan = is_nan(d2);
1162			kv2->hint->status = HS_INITIALIZED;
1163		} else
1164			kv2->hint->status = HS_ERROR;
1165
1166		key2_read = true;
1167	}
1168
1169	if (kv1->hint->status == HS_INITIALIZED &&
1170	    kv2->hint->status == HS_INITIALIZED) {
1171		if (kv1->hint->v.gh.notnum)
1172			return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1173		else if (kv2->hint->v.gh.notnum)
1174			return (+1);
1175
1176		if (kv1->hint->v.gh.nan)
1177			return ((kv2->hint->v.gh.nan) ?
1178			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1179			    -1);
1180		else if (kv2->hint->v.gh.nan)
1181			return (+1);
1182
1183		d1 = kv1->hint->v.gh.d;
1184		d2 = kv2->hint->v.gh.d;
1185
1186		if (d1 < d2)
1187			return (-1);
1188		else if (d1 > d2)
1189			return (+1);
1190		else
1191			return (0);
1192	}
1193
1194	if (!key1_read) {
1195		errno = 0;
1196		d1 = bwstod(kv1->k, &empty1);
1197		err1 = errno;
1198	}
1199
1200	if (!key2_read) {
1201		errno = 0;
1202		d2 = bwstod(kv2->k, &empty2);
1203		err2 = errno;
1204	}
1205
1206	/* Non-value case: */
1207	if (empty1)
1208		return (empty2 ? 0 : -1);
1209	else if (empty2)
1210		return (+1);
1211
1212	/* NAN case */
1213	if (is_nan(d1))
1214		return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1215	else if (is_nan(d2))
1216		return (+1);
1217
1218	/* Infinities */
1219	if (err1 == ERANGE || err2 == ERANGE) {
1220		/* Minus infinity case */
1221		if (huge_minus(d1, err1)) {
1222			if (huge_minus(d2, err2)) {
1223				if (d1 < d2)
1224					return (-1);
1225				if (d1 > d2)
1226					return (+1);
1227				return (0);
1228			} else
1229				return (-1);
1230
1231		} else if (huge_minus(d2, err2)) {
1232			if (huge_minus(d1, err1)) {
1233				if (d1 < d2)
1234					return (-1);
1235				if (d1 > d2)
1236					return (+1);
1237				return (0);
1238			} else
1239				return (+1);
1240		}
1241
1242		/* Plus infinity case */
1243		if (huge_plus(d1, err1)) {
1244			if (huge_plus(d2, err2)) {
1245				if (d1 < d2)
1246					return (-1);
1247				if (d1 > d2)
1248					return (+1);
1249				return (0);
1250			} else
1251				return (+1);
1252		} else if (huge_plus(d2, err2)) {
1253			if (huge_plus(d1, err1)) {
1254				if (d1 < d2)
1255					return (-1);
1256				if (d1 > d2)
1257					return (+1);
1258				return (0);
1259			} else
1260				return (-1);
1261		}
1262	}
1263
1264	if (d1 < d2)
1265		return (-1);
1266	if (d1 > d2)
1267		return (+1);
1268
1269	return (0);
1270}
1271
1272/*
1273 * Implements month sort (-M).
1274 */
1275static int
1276monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1277{
1278	int val1, val2;
1279	bool key1_read, key2_read;
1280
1281	val1 = val2 = 0;
1282	key1_read = key2_read = false;
1283
1284	if (debug_sort) {
1285		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1286		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1287	}
1288
1289	if (kv1->hint->status == HS_UNINITIALIZED) {
1290		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1291		key1_read = true;
1292		kv1->hint->status = HS_INITIALIZED;
1293	}
1294
1295	if (kv2->hint->status == HS_UNINITIALIZED) {
1296		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1297		key2_read = true;
1298		kv2->hint->status = HS_INITIALIZED;
1299	}
1300
1301	if (kv1->hint->status == HS_INITIALIZED) {
1302		val1 = kv1->hint->v.Mh.m;
1303		key1_read = true;
1304	}
1305
1306	if (kv2->hint->status == HS_INITIALIZED) {
1307		val2 = kv2->hint->v.Mh.m;
1308		key2_read = true;
1309	}
1310
1311	if (!key1_read)
1312		val1 = bws_month_score(kv1->k);
1313	if (!key2_read)
1314		val2 = bws_month_score(kv2->k);
1315
1316	if (val1 == val2) {
1317		return (0);
1318	}
1319	if (val1 < val2)
1320		return (-1);
1321	return (+1);
1322}
1323