1/*-
2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30
31#include <sys/types.h>
32
33#include <errno.h>
34#include <err.h>
35#include <langinfo.h>
36#include <limits.h>
37#include <math.h>
38#include <md5.h>
39#include <stdlib.h>
40#include <string.h>
41#include <wchar.h>
42#include <wctype.h>
43
44#include "coll.h"
45#include "vsort.h"
46
47struct key_specs *keys;
48size_t keys_num = 0;
49
50wint_t symbol_decimal_point = L'.';
51/* there is no default thousands separator in collate rules: */
52wint_t symbol_thousands_sep = 0;
53wint_t symbol_negative_sign = L'-';
54wint_t symbol_positive_sign = L'+';
55
56static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
57static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
58static int monthcoll(struct key_value*, struct key_value *, size_t offset);
59static int numcoll(struct key_value*, struct key_value *, size_t offset);
60static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
61static int randomcoll(struct key_value*, struct key_value *, size_t offset);
62static int versioncoll(struct key_value*, struct key_value *, size_t offset);
63
64/*
65 * Allocate keys array
66 */
67struct keys_array *
68keys_array_alloc(void)
69{
70	struct keys_array *ka;
71	size_t sz;
72
73	sz = keys_array_size();
74	ka = sort_malloc(sz);
75	memset(ka, 0, sz);
76
77	return (ka);
78}
79
80/*
81 * Calculate whether we need key hint space
82 */
83static size_t
84key_hint_size(void)
85{
86
87	return (need_hint ? sizeof(struct key_hint) : 0);
88}
89
90/*
91 * Calculate keys array size
92 */
93size_t
94keys_array_size(void)
95{
96
97	return (keys_num * (sizeof(struct key_value) + key_hint_size()));
98}
99
100/*
101 * Clean data of keys array
102 */
103void
104clean_keys_array(const struct bwstring *s, struct keys_array *ka)
105{
106
107	if (ka) {
108		for (size_t i = 0; i < keys_num; ++i)
109			if (ka->key[i].k && ka->key[i].k != s)
110				bwsfree(ka->key[i].k);
111		memset(ka, 0, keys_array_size());
112	}
113}
114
115/*
116 * Set value of a key in the keys set
117 */
118void
119set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
120{
121
122	if (ka && keys_num > ind) {
123		struct key_value *kv;
124
125		kv = &(ka->key[ind]);
126
127		if (kv->k && kv->k != s)
128			bwsfree(kv->k);
129		kv->k = s;
130	}
131}
132
133/*
134 * Initialize a sort list item
135 */
136struct sort_list_item *
137sort_list_item_alloc(void)
138{
139	struct sort_list_item *si;
140	size_t sz;
141
142	sz = sizeof(struct sort_list_item) + keys_array_size();
143	si = sort_malloc(sz);
144	memset(si, 0, sz);
145
146	return (si);
147}
148
149size_t
150sort_list_item_size(struct sort_list_item *si)
151{
152	size_t ret = 0;
153
154	if (si) {
155		ret = sizeof(struct sort_list_item) + keys_array_size();
156		if (si->str)
157			ret += bws_memsize(si->str);
158		for (size_t i = 0; i < keys_num; ++i) {
159			struct key_value *kv;
160
161			kv = &(si->ka.key[i]);
162
163			if (kv->k != si->str)
164				ret += bws_memsize(kv->k);
165		}
166	}
167	return (ret);
168}
169
170/*
171 * Calculate key for a sort list item
172 */
173static void
174sort_list_item_make_key(struct sort_list_item *si)
175{
176
177	preproc(si->str, &(si->ka));
178}
179
180/*
181 * Set value of a sort list item.
182 * Return combined string and keys memory size.
183 */
184void
185sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
186{
187
188	if (si) {
189		clean_keys_array(si->str, &(si->ka));
190		if (si->str) {
191			if (si->str == str) {
192				/* we are trying to reset the same string */
193				return;
194			} else {
195				bwsfree(si->str);
196				si->str = NULL;
197			}
198		}
199		si->str = str;
200		sort_list_item_make_key(si);
201	}
202}
203
204/*
205 * De-allocate a sort list item object memory
206 */
207void
208sort_list_item_clean(struct sort_list_item *si)
209{
210
211	if (si) {
212		clean_keys_array(si->str, &(si->ka));
213		if (si->str) {
214			bwsfree(si->str);
215			si->str = NULL;
216		}
217	}
218}
219
220/*
221 * Skip columns according to specs
222 */
223static size_t
224skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
225    bool skip_blanks, bool *empty_key)
226{
227	if (cols < 1)
228		return (BWSLEN(s) + 1);
229
230	if (skip_blanks)
231		while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
232			++start;
233
234	while (start < BWSLEN(s) && cols > 1) {
235		--cols;
236		++start;
237	}
238
239	if (start >= BWSLEN(s))
240		*empty_key = true;
241
242	return (start);
243}
244
245/*
246 * Skip fields according to specs
247 */
248static size_t
249skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
250{
251
252	if (fields < 2) {
253		if (BWSLEN(s) == 0)
254			*empty_field = true;
255		return (0);
256	} else if (!(sort_opts_vals.tflag)) {
257		size_t cpos = 0;
258		bool pb = true;
259
260		while (cpos < BWSLEN(s)) {
261			bool isblank;
262
263			isblank = iswblank(BWS_GET(s, cpos));
264
265			if (isblank && !pb) {
266				--fields;
267				if (fields <= 1)
268					return (cpos);
269			}
270			pb = isblank;
271			++cpos;
272		}
273		if (fields > 1)
274			*empty_field = true;
275		return (cpos);
276	} else {
277		size_t cpos = 0;
278
279		while (cpos < BWSLEN(s)) {
280			if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
281				--fields;
282				if (fields <= 1)
283					return (cpos + 1);
284			}
285			++cpos;
286		}
287		if (fields > 1)
288			*empty_field = true;
289		return (cpos);
290	}
291}
292
293/*
294 * Find fields start
295 */
296static void
297find_field_start(const struct bwstring *s, struct key_specs *ks,
298    size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
299{
300
301	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
302	if (!*empty_field)
303		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
304		    ks->pos1b, empty_key);
305	else
306		*empty_key = true;
307}
308
309/*
310 * Find end key position
311 */
312static size_t
313find_field_end(const struct bwstring *s, struct key_specs *ks)
314{
315	size_t f2, next_field_start, pos_end;
316	bool empty_field, empty_key;
317
318	pos_end = 0;
319	next_field_start = 0;
320	empty_field = false;
321	empty_key = false;
322	f2 = ks->f2;
323
324	if (f2 == 0)
325		return (BWSLEN(s) + 1);
326	else {
327		if (ks->c2 == 0) {
328			next_field_start = skip_fields_to_start(s, f2 + 1,
329			    &empty_field);
330			if ((next_field_start > 0) && sort_opts_vals.tflag &&
331			    ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
332			    next_field_start - 1)))
333				--next_field_start;
334		} else
335			next_field_start = skip_fields_to_start(s, f2,
336			    &empty_field);
337	}
338
339	if (empty_field || (next_field_start >= BWSLEN(s)))
340		return (BWSLEN(s) + 1);
341
342	if (ks->c2) {
343		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
344		    ks->pos2b, &empty_key);
345		if (pos_end < BWSLEN(s))
346			++pos_end;
347	} else
348		pos_end = next_field_start;
349
350	return (pos_end);
351}
352
353/*
354 * Cut a field according to the key specs
355 */
356static struct bwstring *
357cut_field(const struct bwstring *s, struct key_specs *ks)
358{
359	struct bwstring *ret = NULL;
360
361	if (s && ks) {
362		size_t field_start, key_end, key_start, sz;
363		bool empty_field, empty_key;
364
365		field_start = 0;
366		key_start = 0;
367		empty_field = false;
368		empty_key = false;
369
370		find_field_start(s, ks, &field_start, &key_start,
371		    &empty_field, &empty_key);
372
373		if (empty_key)
374			sz = 0;
375		else {
376			key_end = find_field_end(s, ks);
377			sz = (key_end < key_start) ? 0 : (key_end - key_start);
378		}
379
380		ret = bwsalloc(sz);
381		if (sz)
382			bwsnocpy(ret, s, key_start, sz);
383	} else
384		ret = bwsalloc(0);
385
386	return (ret);
387}
388
389/*
390 * Preprocesses a line applying the necessary transformations
391 * specified by command line options and returns the preprocessed
392 * string, which can be used to compare.
393 */
394int
395preproc(struct bwstring *s, struct keys_array *ka)
396{
397
398	if (sort_opts_vals.kflag)
399		for (size_t i = 0; i < keys_num; i++) {
400			struct bwstring *key;
401			struct key_specs *kspecs;
402			struct sort_mods *sm;
403
404			kspecs = &(keys[i]);
405			key = cut_field(s, kspecs);
406
407			sm = &(kspecs->sm);
408			if (sm->dflag)
409				key = dictionary_order(key);
410			else if (sm->iflag)
411				key = ignore_nonprinting(key);
412			if (sm->fflag || sm->Mflag)
413				key = ignore_case(key);
414
415			set_key_on_keys_array(ka, key, i);
416		}
417	else {
418		struct bwstring *ret = NULL;
419		struct sort_mods *sm = default_sort_mods;
420
421		if (sm->bflag) {
422			if (ret == NULL)
423				ret = bwsdup(s);
424			ret = ignore_leading_blanks(ret);
425		}
426		if (sm->dflag) {
427			if (ret == NULL)
428				ret = bwsdup(s);
429			ret = dictionary_order(ret);
430		} else if (sm->iflag) {
431			if (ret == NULL)
432				ret = bwsdup(s);
433			ret = ignore_nonprinting(ret);
434		}
435		if (sm->fflag || sm->Mflag) {
436			if (ret == NULL)
437				ret = bwsdup(s);
438			ret = ignore_case(ret);
439		}
440		if (ret == NULL)
441			set_key_on_keys_array(ka, s, 0);
442		else
443			set_key_on_keys_array(ka, ret, 0);
444	}
445
446	return 0;
447}
448
449cmpcoll_t
450get_sort_func(struct sort_mods *sm)
451{
452
453	if (sm->nflag)
454		return (numcoll);
455	else if (sm->hflag)
456		return (hnumcoll);
457	else if (sm->gflag)
458		return (gnumcoll);
459	else if (sm->Mflag)
460		return (monthcoll);
461	else if (sm->Rflag)
462		return (randomcoll);
463	else if (sm->Vflag)
464		return (versioncoll);
465	else
466		return (wstrcoll);
467}
468
469/*
470 * Compares the given strings.  Returns a positive number if
471 * the first precedes the second, a negative number if the second is
472 * the preceding one, and zero if they are equal.  This function calls
473 * the underlying collate functions, which done the actual comparison.
474 */
475int
476key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
477{
478	struct sort_mods *sm;
479	int res = 0;
480
481	for (size_t i = 0; i < keys_num; ++i) {
482		sm = &(keys[i].sm);
483
484		if (sm->rflag)
485			res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
486		else
487			res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
488
489		if (res)
490			break;
491
492		/* offset applies to only the first key */
493		offset = 0;
494	}
495	return (res);
496}
497
498/*
499 * Compare two strings.
500 * Plain symbol-by-symbol comparison.
501 */
502int
503top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
504{
505
506	if (default_sort_mods->rflag) {
507		const struct bwstring *tmp;
508
509		tmp = s1;
510		s1 = s2;
511		s2 = tmp;
512	}
513
514	return (bwscoll(s1, s2, 0));
515}
516
517/*
518 * Compare a string and a sort list item, according to the sort specs.
519 */
520int
521str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
522{
523	struct keys_array *ka1;
524	int ret = 0;
525
526	ka1 = keys_array_alloc();
527
528	preproc(str1, ka1);
529
530	sort_list_item_make_key(*ss2);
531
532	if (debug_sort) {
533		bwsprintf(stdout, str1, "; s1=<", ">");
534		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
535	}
536
537	ret = key_coll(ka1, &((*ss2)->ka), 0);
538
539	if (debug_sort)
540		printf("; cmp1=%d", ret);
541
542	clean_keys_array(str1, ka1);
543	sort_free(ka1);
544
545	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
546		ret = top_level_str_coll(str1, ((*ss2)->str));
547		if (debug_sort)
548			printf("; cmp2=%d", ret);
549	}
550
551	if (debug_sort)
552		printf("\n");
553
554	return (ret);
555}
556
557/*
558 * Compare two sort list items, according to the sort specs.
559 */
560int
561list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
562    size_t offset)
563{
564	int ret;
565
566	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
567
568	if (debug_sort) {
569		if (offset)
570			printf("; offset=%d", (int) offset);
571		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
572		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
573		printf("; cmp1=%d\n", ret);
574	}
575
576	if (ret)
577		return (ret);
578
579	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
580		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
581		if (debug_sort)
582			printf("; cmp2=%d\n", ret);
583	}
584
585	return (ret);
586}
587
588/*
589 * Compare two sort list items, according to the sort specs.
590 */
591int
592list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
593{
594
595	return (list_coll_offset(ss1, ss2, 0));
596}
597
598#define LSCDEF(N) 											\
599static int 												\
600list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)					\
601{													\
602													\
603	return (list_coll_offset(ss1, ss2, N));								\
604}
605
606LSCDEF(1)
607LSCDEF(2)
608LSCDEF(3)
609LSCDEF(4)
610LSCDEF(5)
611LSCDEF(6)
612LSCDEF(7)
613LSCDEF(8)
614LSCDEF(9)
615LSCDEF(10)
616LSCDEF(11)
617LSCDEF(12)
618LSCDEF(13)
619LSCDEF(14)
620LSCDEF(15)
621LSCDEF(16)
622LSCDEF(17)
623LSCDEF(18)
624LSCDEF(19)
625LSCDEF(20)
626
627listcoll_t
628get_list_call_func(size_t offset)
629{
630	static const listcoll_t lsarray[] = { list_coll, list_coll_1,
631	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
632	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
633	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
634	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
635	    list_coll_18, list_coll_19, list_coll_20 };
636
637	if (offset <= 20)
638		return (lsarray[offset]);
639
640	return (list_coll);
641}
642
643/*
644 * Compare two sort list items, only by their original string.
645 */
646int
647list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
648{
649
650	return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
651}
652
653/*
654 * Maximum size of a number in the string (before or after decimal point)
655 */
656#define MAX_NUM_SIZE (128)
657
658/*
659 * Set suffix value
660 */
661static void setsuffix(wchar_t c, unsigned char *si)
662{
663	switch (c){
664	case L'k':
665	case L'K':
666		*si = 1;
667		break;
668	case L'M':
669		*si = 2;
670		break;
671	case L'G':
672		*si = 3;
673		break;
674	case L'T':
675		*si = 4;
676		break;
677	case L'P':
678		*si = 5;
679		break;
680	case L'E':
681		*si = 6;
682		break;
683	case L'Z':
684		*si = 7;
685		break;
686	case L'Y':
687		*si = 8;
688		break;
689	default:
690		*si = 0;
691	};
692}
693
694/*
695 * Read string s and parse the string into a fixed-decimal-point number.
696 * sign equals -1 if the number is negative (explicit plus is not allowed,
697 * according to GNU sort's "info sort".
698 * The number part before decimal point is in the smain, after the decimal
699 * point is in sfrac, tail is the pointer to the remainder of the string.
700 */
701static int
702read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
703{
704	bwstring_iterator s;
705
706	s = bws_begin(s0);
707
708	/* always end the fraction with zero, even if we have no fraction */
709	sfrac[0] = 0;
710
711	while (iswblank(bws_get_iter_value(s)))
712		s = bws_iterator_inc(s, 1);
713
714	if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
715		*sign = -1;
716		s = bws_iterator_inc(s, 1);
717	}
718
719	// This is '0', not '\0', do not change this
720	while (iswdigit(bws_get_iter_value(s)) &&
721	    (bws_get_iter_value(s) == L'0'))
722		s = bws_iterator_inc(s, 1);
723
724	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
725		if (iswdigit(bws_get_iter_value(s))) {
726			smain[*main_len] = bws_get_iter_value(s);
727			s = bws_iterator_inc(s, 1);
728			*main_len += 1;
729		} else if (symbol_thousands_sep &&
730		    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
731			s = bws_iterator_inc(s, 1);
732		else
733			break;
734	}
735
736	smain[*main_len] = 0;
737
738	if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
739		s = bws_iterator_inc(s, 1);
740		while (iswdigit(bws_get_iter_value(s)) &&
741		    *frac_len < MAX_NUM_SIZE) {
742			sfrac[*frac_len] = bws_get_iter_value(s);
743			s = bws_iterator_inc(s, 1);
744			*frac_len += 1;
745		}
746		sfrac[*frac_len] = 0;
747
748		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
749			--(*frac_len);
750			sfrac[*frac_len] = L'\0';
751		}
752	}
753
754	setsuffix(bws_get_iter_value(s),si);
755
756	if ((*main_len + *frac_len) == 0)
757		*sign = 0;
758
759	return (0);
760}
761
762/*
763 * Implements string sort.
764 */
765static int
766wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
767{
768
769	if (debug_sort) {
770		if (offset)
771			printf("; offset=%d\n", (int) offset);
772		bwsprintf(stdout, kv1->k, "; k1=<", ">");
773		printf("(%zu)", BWSLEN(kv1->k));
774		bwsprintf(stdout, kv2->k, ", k2=<", ">");
775		printf("(%zu)", BWSLEN(kv2->k));
776	}
777
778	return (bwscoll(kv1->k, kv2->k, offset));
779}
780
781/*
782 * Compare two suffixes
783 */
784static inline int
785cmpsuffix(unsigned char si1, unsigned char si2)
786{
787
788	return ((char)si1 - (char)si2);
789}
790
791/*
792 * Implements numeric sort for -n and -h.
793 */
794static int
795numcoll_impl(struct key_value *kv1, struct key_value *kv2,
796    size_t offset __unused, bool use_suffix)
797{
798	struct bwstring *s1, *s2;
799	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
800	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
801	int cmp_res, sign1, sign2;
802	size_t frac1, frac2, main1, main2;
803	unsigned char SI1, SI2;
804	bool e1, e2, key1_read, key2_read;
805
806	s1 = kv1->k;
807	s2 = kv2->k;
808	sign1 = sign2 = 0;
809	main1 = main2 = 0;
810	frac1 = frac2 = 0;
811
812	cmp_res = 0;
813	key1_read = key2_read = false;
814
815	if (debug_sort) {
816		bwsprintf(stdout, s1, "; k1=<", ">");
817		bwsprintf(stdout, s2, ", k2=<", ">");
818	}
819
820	if (s1 == s2)
821		return (0);
822
823	if (kv1->hint->status == HS_UNINITIALIZED) {
824		/* read the number from the string */
825		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
826		key1_read = true;
827		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
828		if(main1 < 1 && frac1 < 1)
829			kv1->hint->v.nh.empty=true;
830		kv1->hint->v.nh.si = SI1;
831		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
832		    HS_INITIALIZED : HS_ERROR;
833		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
834	}
835
836	if (kv2->hint->status == HS_UNINITIALIZED) {
837		/* read the number from the string */
838		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
839		key2_read = true;
840		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
841		if(main2 < 1 && frac2 < 1)
842			kv2->hint->v.nh.empty=true;
843		kv2->hint->v.nh.si = SI2;
844		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
845		    HS_INITIALIZED : HS_ERROR;
846		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
847	}
848
849	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
850	    HS_INITIALIZED) {
851		unsigned long long n1, n2;
852		bool neg1, neg2;
853
854		e1 = kv1->hint->v.nh.empty;
855		e2 = kv2->hint->v.nh.empty;
856
857		if (e1 && e2)
858			return (0);
859
860		neg1 = kv1->hint->v.nh.neg;
861		neg2 = kv2->hint->v.nh.neg;
862
863		if (neg1 && !neg2)
864			return (-1);
865		if (neg2 && !neg1)
866			return (+1);
867
868		if (e1)
869			return (neg2 ? +1 : -1);
870		else if (e2)
871			return (neg1 ? -1 : +1);
872
873
874		if (use_suffix) {
875			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
876			if (cmp_res)
877				return (neg1 ? -cmp_res : cmp_res);
878		}
879
880		n1 = kv1->hint->v.nh.n1;
881		n2 = kv2->hint->v.nh.n1;
882		if (n1 < n2)
883			return (neg1 ? +1 : -1);
884		else if (n1 > n2)
885			return (neg1 ? -1 : +1);
886	}
887
888	/* read the numbers from the strings */
889	if (!key1_read)
890		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
891	if (!key2_read)
892		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
893
894	e1 = ((main1 + frac1) == 0);
895	e2 = ((main2 + frac2) == 0);
896
897	if (e1 && e2)
898		return (0);
899
900	/* we know the result if the signs are different */
901	if (sign1 < 0 && sign2 >= 0)
902		return (-1);
903	if (sign1 >= 0 && sign2 < 0)
904		return (+1);
905
906	if (e1)
907		return ((sign2 < 0) ? +1 : -1);
908	else if (e2)
909		return ((sign1 < 0) ? -1 : +1);
910
911	if (use_suffix) {
912		cmp_res = cmpsuffix(SI1, SI2);
913		if (cmp_res)
914			return ((sign1 < 0) ? -cmp_res : cmp_res);
915	}
916
917	/* if both numbers are empty assume that the strings are equal */
918	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
919		return (0);
920
921	/*
922	 * if the main part is of different size, we know the result
923	 * (because the leading zeros are removed)
924	 */
925	if (main1 < main2)
926		cmp_res = -1;
927	else if (main1 > main2)
928		cmp_res = +1;
929	/* if the sizes are equal then simple non-collate string compare gives the correct result */
930	else
931		cmp_res = wcscmp(smain1, smain2);
932
933	/* check fraction */
934	if (!cmp_res)
935		cmp_res = wcscmp(sfrac1, sfrac2);
936
937	if (!cmp_res)
938		return (0);
939
940	/* reverse result if the signs are negative */
941	if (sign1 < 0 && sign2 < 0)
942		cmp_res = -cmp_res;
943
944	return (cmp_res);
945}
946
947/*
948 * Implements numeric sort (-n).
949 */
950static int
951numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
952{
953
954	return (numcoll_impl(kv1, kv2, offset, false));
955}
956
957/*
958 * Implements 'human' numeric sort (-h).
959 */
960static int
961hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
962{
963
964	return (numcoll_impl(kv1, kv2, offset, true));
965}
966
967/*
968 * Implements random sort (-R).
969 */
970static int
971randomcoll(struct key_value *kv1, struct key_value *kv2,
972    size_t offset __unused)
973{
974	struct bwstring *s1, *s2;
975	MD5_CTX ctx1, ctx2;
976	char *b1, *b2;
977
978	s1 = kv1->k;
979	s2 = kv2->k;
980
981	if (debug_sort) {
982		bwsprintf(stdout, s1, "; k1=<", ">");
983		bwsprintf(stdout, s2, ", k2=<", ">");
984	}
985
986	if (s1 == s2)
987		return (0);
988
989	memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
990	memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
991
992	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
993	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
994	b1 = MD5End(&ctx1, NULL);
995	b2 = MD5End(&ctx2, NULL);
996	if (b1 == NULL) {
997		if (b2 == NULL)
998			return (0);
999		else {
1000			sort_free(b2);
1001			return (-1);
1002		}
1003	} else if (b2 == NULL) {
1004		sort_free(b1);
1005		return (+1);
1006	} else {
1007		int cmp_res;
1008
1009		cmp_res = strcmp(b1,b2);
1010		sort_free(b1);
1011		sort_free(b2);
1012
1013		if (!cmp_res)
1014			cmp_res = bwscoll(s1, s2, 0);
1015
1016		return (cmp_res);
1017	}
1018}
1019
1020/*
1021 * Implements version sort (-V).
1022 */
1023static int
1024versioncoll(struct key_value *kv1, struct key_value *kv2,
1025    size_t offset __unused)
1026{
1027	struct bwstring *s1, *s2;
1028
1029	s1 = kv1->k;
1030	s2 = kv2->k;
1031
1032	if (debug_sort) {
1033		bwsprintf(stdout, s1, "; k1=<", ">");
1034		bwsprintf(stdout, s2, ", k2=<", ">");
1035	}
1036
1037	if (s1 == s2)
1038		return (0);
1039
1040	return (vcmp(s1, s2));
1041}
1042
1043/*
1044 * Check for minus infinity
1045 */
1046static inline bool
1047huge_minus(double d, int err1)
1048{
1049
1050	if (err1 == ERANGE)
1051		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1052			return (+1);
1053
1054	return (0);
1055}
1056
1057/*
1058 * Check for plus infinity
1059 */
1060static inline bool
1061huge_plus(double d, int err1)
1062{
1063
1064	if (err1 == ERANGE)
1065		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1066			return (+1);
1067
1068	return (0);
1069}
1070
1071/*
1072 * Check whether a function is a NAN
1073 */
1074static bool
1075is_nan(double d)
1076{
1077
1078	return ((d == NAN) || (isnan(d)));
1079}
1080
1081/*
1082 * Compare two NANs
1083 */
1084static int
1085cmp_nans(double d1, double d2)
1086{
1087
1088	if (d1 < d2)
1089		return (-1);
1090	if (d2 > d2)
1091		return (+1);
1092	return (0);
1093}
1094
1095/*
1096 * Implements general numeric sort (-g).
1097 */
1098static int
1099gnumcoll(struct key_value *kv1, struct key_value *kv2,
1100    size_t offset __unused)
1101{
1102	double d1, d2;
1103	int err1, err2;
1104	bool empty1, empty2, key1_read, key2_read;
1105
1106	d1 = d2 = 0;
1107	err1 = err2 = 0;
1108	key1_read = key2_read = false;
1109
1110	if (debug_sort) {
1111		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1112		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1113	}
1114
1115	if (kv1->hint->status == HS_UNINITIALIZED) {
1116		errno = 0;
1117		d1 = bwstod(kv1->k, &empty1);
1118		err1 = errno;
1119
1120		if (empty1)
1121			kv1->hint->v.gh.notnum = true;
1122		else if (err1 == 0) {
1123			kv1->hint->v.gh.d = d1;
1124			kv1->hint->v.gh.nan = is_nan(d1);
1125			kv1->hint->status = HS_INITIALIZED;
1126		} else
1127			kv1->hint->status = HS_ERROR;
1128
1129		key1_read = true;
1130	}
1131
1132	if (kv2->hint->status == HS_UNINITIALIZED) {
1133		errno = 0;
1134		d2 = bwstod(kv2->k, &empty2);
1135		err2 = errno;
1136
1137		if (empty2)
1138			kv2->hint->v.gh.notnum = true;
1139		else if (err2 == 0) {
1140			kv2->hint->v.gh.d = d2;
1141			kv2->hint->v.gh.nan = is_nan(d2);
1142			kv2->hint->status = HS_INITIALIZED;
1143		} else
1144			kv2->hint->status = HS_ERROR;
1145
1146		key2_read = true;
1147	}
1148
1149	if (kv1->hint->status == HS_INITIALIZED &&
1150	    kv2->hint->status == HS_INITIALIZED) {
1151		if (kv1->hint->v.gh.notnum)
1152			return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1153		else if (kv2->hint->v.gh.notnum)
1154			return (+1);
1155
1156		if (kv1->hint->v.gh.nan)
1157			return ((kv2->hint->v.gh.nan) ?
1158			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1159			    -1);
1160		else if (kv2->hint->v.gh.nan)
1161			return (+1);
1162
1163		d1 = kv1->hint->v.gh.d;
1164		d2 = kv2->hint->v.gh.d;
1165
1166		if (d1 < d2)
1167			return (-1);
1168		else if (d1 > d2)
1169			return (+1);
1170		else
1171			return (0);
1172	}
1173
1174	if (!key1_read) {
1175		errno = 0;
1176		d1 = bwstod(kv1->k, &empty1);
1177		err1 = errno;
1178	}
1179
1180	if (!key2_read) {
1181		errno = 0;
1182		d2 = bwstod(kv2->k, &empty2);
1183		err2 = errno;
1184	}
1185
1186	/* Non-value case: */
1187	if (empty1)
1188		return (empty2 ? 0 : -1);
1189	else if (empty2)
1190		return (+1);
1191
1192	/* NAN case */
1193	if (is_nan(d1))
1194		return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1195	else if (is_nan(d2))
1196		return (+1);
1197
1198	/* Infinities */
1199	if (err1 == ERANGE || err2 == ERANGE) {
1200		/* Minus infinity case */
1201		if (huge_minus(d1, err1)) {
1202			if (huge_minus(d2, err2)) {
1203				if (d1 < d2)
1204					return (-1);
1205				if (d1 > d2)
1206					return (+1);
1207				return (0);
1208			} else
1209				return (-1);
1210
1211		} else if (huge_minus(d2, err2)) {
1212			if (huge_minus(d1, err1)) {
1213				if (d1 < d2)
1214					return (-1);
1215				if (d1 > d2)
1216					return (+1);
1217				return (0);
1218			} else
1219				return (+1);
1220		}
1221
1222		/* Plus infinity case */
1223		if (huge_plus(d1, err1)) {
1224			if (huge_plus(d2, err2)) {
1225				if (d1 < d2)
1226					return (-1);
1227				if (d1 > d2)
1228					return (+1);
1229				return (0);
1230			} else
1231				return (+1);
1232		} else if (huge_plus(d2, err2)) {
1233			if (huge_plus(d1, err1)) {
1234				if (d1 < d2)
1235					return (-1);
1236				if (d1 > d2)
1237					return (+1);
1238				return (0);
1239			} else
1240				return (-1);
1241		}
1242	}
1243
1244	if (d1 < d2)
1245		return (-1);
1246	if (d1 > d2)
1247		return (+1);
1248
1249	return (0);
1250}
1251
1252/*
1253 * Implements month sort (-M).
1254 */
1255static int
1256monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1257{
1258	int val1, val2;
1259	bool key1_read, key2_read;
1260
1261	val1 = val2 = 0;
1262	key1_read = key2_read = false;
1263
1264	if (debug_sort) {
1265		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1266		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1267	}
1268
1269	if (kv1->hint->status == HS_UNINITIALIZED) {
1270		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1271		key1_read = true;
1272		kv1->hint->status = HS_INITIALIZED;
1273	}
1274
1275	if (kv2->hint->status == HS_UNINITIALIZED) {
1276		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1277		key2_read = true;
1278		kv2->hint->status = HS_INITIALIZED;
1279	}
1280
1281	if (kv1->hint->status == HS_INITIALIZED) {
1282		val1 = kv1->hint->v.Mh.m;
1283		key1_read = true;
1284	}
1285
1286	if (kv2->hint->status == HS_INITIALIZED) {
1287		val2 = kv2->hint->v.Mh.m;
1288		key2_read = true;
1289	}
1290
1291	if (!key1_read)
1292		val1 = bws_month_score(kv1->k);
1293	if (!key2_read)
1294		val2 = bws_month_score(kv2->k);
1295
1296	if (val1 == val2) {
1297		return (0);
1298	}
1299	if (val1 < val2)
1300		return (-1);
1301	return (+1);
1302}
1303