1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include <sys/types.h>
34
35#include <errno.h>
36#include <err.h>
37#include <langinfo.h>
38#include <limits.h>
39#include <math.h>
40#include <md5.h>
41#include <stdlib.h>
42#include <string.h>
43#include <wchar.h>
44#include <wctype.h>
45
46#include "coll.h"
47#include "vsort.h"
48
49struct key_specs *keys;
50size_t keys_num = 0;
51
52wint_t symbol_decimal_point = L'.';
53/* there is no default thousands separator in collate rules: */
54wint_t symbol_thousands_sep = 0;
55wint_t symbol_negative_sign = L'-';
56wint_t symbol_positive_sign = L'+';
57
58static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
59static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
60static int monthcoll(struct key_value*, struct key_value *, size_t offset);
61static int numcoll(struct key_value*, struct key_value *, size_t offset);
62static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
63static int randomcoll(struct key_value*, struct key_value *, size_t offset);
64static int versioncoll(struct key_value*, struct key_value *, size_t offset);
65
66/*
67 * Allocate keys array
68 */
69struct keys_array *
70keys_array_alloc(void)
71{
72	struct keys_array *ka;
73	size_t sz;
74
75	sz = keys_array_size();
76	ka = sort_malloc(sz);
77	memset(ka, 0, sz);
78
79	return (ka);
80}
81
82/*
83 * Calculate whether we need key hint space
84 */
85static size_t
86key_hint_size(void)
87{
88
89	return (need_hint ? sizeof(struct key_hint) : 0);
90}
91
92/*
93 * Calculate keys array size
94 */
95size_t
96keys_array_size(void)
97{
98
99	return (keys_num * (sizeof(struct key_value) + key_hint_size()));
100}
101
102/*
103 * Clean data of keys array
104 */
105void
106clean_keys_array(const struct bwstring *s, struct keys_array *ka)
107{
108
109	if (ka) {
110		for (size_t i = 0; i < keys_num; ++i) {
111			const struct key_value *kv;
112
113			kv = get_key_from_keys_array(ka, i);
114			if (kv->k && kv->k != s)
115				bwsfree(kv->k);
116		}
117		memset(ka, 0, keys_array_size());
118	}
119}
120
121/*
122 * Get pointer to a key value in the keys set
123 */
124struct key_value *
125get_key_from_keys_array(struct keys_array *ka, size_t ind)
126{
127
128	return ((struct key_value *)((caddr_t)ka->key +
129	    ind * (sizeof(struct key_value) + key_hint_size())));
130}
131
132/*
133 * Set value of a key in the keys set
134 */
135void
136set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
137{
138
139	if (ka && keys_num > ind) {
140		struct key_value *kv;
141
142		kv = get_key_from_keys_array(ka, ind);
143
144		if (kv->k && kv->k != s)
145			bwsfree(kv->k);
146		kv->k = s;
147	}
148}
149
150/*
151 * Initialize a sort list item
152 */
153struct sort_list_item *
154sort_list_item_alloc(void)
155{
156	struct sort_list_item *si;
157	size_t sz;
158
159	sz = sizeof(struct sort_list_item) + keys_array_size();
160	si = sort_malloc(sz);
161	memset(si, 0, sz);
162
163	return (si);
164}
165
166size_t
167sort_list_item_size(struct sort_list_item *si)
168{
169	size_t ret = 0;
170
171	if (si) {
172		ret = sizeof(struct sort_list_item) + keys_array_size();
173		if (si->str)
174			ret += bws_memsize(si->str);
175		for (size_t i = 0; i < keys_num; ++i) {
176			const struct key_value *kv;
177
178			kv = get_key_from_keys_array(&si->ka, i);
179
180			if (kv->k != si->str)
181				ret += bws_memsize(kv->k);
182		}
183	}
184	return (ret);
185}
186
187/*
188 * Calculate key for a sort list item
189 */
190static void
191sort_list_item_make_key(struct sort_list_item *si)
192{
193
194	preproc(si->str, &(si->ka));
195}
196
197/*
198 * Set value of a sort list item.
199 * Return combined string and keys memory size.
200 */
201void
202sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
203{
204
205	if (si) {
206		clean_keys_array(si->str, &(si->ka));
207		if (si->str) {
208			if (si->str == str) {
209				/* we are trying to reset the same string */
210				return;
211			} else {
212				bwsfree(si->str);
213				si->str = NULL;
214			}
215		}
216		si->str = str;
217		sort_list_item_make_key(si);
218	}
219}
220
221/*
222 * De-allocate a sort list item object memory
223 */
224void
225sort_list_item_clean(struct sort_list_item *si)
226{
227
228	if (si) {
229		clean_keys_array(si->str, &(si->ka));
230		if (si->str) {
231			bwsfree(si->str);
232			si->str = NULL;
233		}
234	}
235}
236
237/*
238 * Skip columns according to specs
239 */
240static size_t
241skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
242    bool skip_blanks, bool *empty_key)
243{
244	if (cols < 1)
245		return (BWSLEN(s) + 1);
246
247	if (skip_blanks)
248		while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
249			++start;
250
251	while (start < BWSLEN(s) && cols > 1) {
252		--cols;
253		++start;
254	}
255
256	if (start >= BWSLEN(s))
257		*empty_key = true;
258
259	return (start);
260}
261
262/*
263 * Skip fields according to specs
264 */
265static size_t
266skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
267{
268
269	if (fields < 2) {
270		if (BWSLEN(s) == 0)
271			*empty_field = true;
272		return (0);
273	} else if (!(sort_opts_vals.tflag)) {
274		size_t cpos = 0;
275		bool pb = true;
276
277		while (cpos < BWSLEN(s)) {
278			bool isblank;
279
280			isblank = iswblank(BWS_GET(s, cpos));
281
282			if (isblank && !pb) {
283				--fields;
284				if (fields <= 1)
285					return (cpos);
286			}
287			pb = isblank;
288			++cpos;
289		}
290		if (fields > 1)
291			*empty_field = true;
292		return (cpos);
293	} else {
294		size_t cpos = 0;
295
296		while (cpos < BWSLEN(s)) {
297			if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
298				--fields;
299				if (fields <= 1)
300					return (cpos + 1);
301			}
302			++cpos;
303		}
304		if (fields > 1)
305			*empty_field = true;
306		return (cpos);
307	}
308}
309
310/*
311 * Find fields start
312 */
313static void
314find_field_start(const struct bwstring *s, struct key_specs *ks,
315    size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
316{
317
318	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
319	if (!*empty_field)
320		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
321		    ks->pos1b, empty_key);
322	else
323		*empty_key = true;
324}
325
326/*
327 * Find end key position
328 */
329static size_t
330find_field_end(const struct bwstring *s, struct key_specs *ks)
331{
332	size_t f2, next_field_start, pos_end;
333	bool empty_field, empty_key;
334
335	empty_field = false;
336	empty_key = false;
337	f2 = ks->f2;
338
339	if (f2 == 0)
340		return (BWSLEN(s) + 1);
341	else {
342		if (ks->c2 == 0) {
343			next_field_start = skip_fields_to_start(s, f2 + 1,
344			    &empty_field);
345			if ((next_field_start > 0) && sort_opts_vals.tflag &&
346			    ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
347			    next_field_start - 1)))
348				--next_field_start;
349		} else
350			next_field_start = skip_fields_to_start(s, f2,
351			    &empty_field);
352	}
353
354	if (empty_field || (next_field_start >= BWSLEN(s)))
355		return (BWSLEN(s) + 1);
356
357	if (ks->c2) {
358		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
359		    ks->pos2b, &empty_key);
360		if (pos_end < BWSLEN(s))
361			++pos_end;
362	} else
363		pos_end = next_field_start;
364
365	return (pos_end);
366}
367
368/*
369 * Cut a field according to the key specs
370 */
371static struct bwstring *
372cut_field(const struct bwstring *s, struct key_specs *ks)
373{
374	struct bwstring *ret = NULL;
375
376	if (s && ks) {
377		size_t field_start, key_end, key_start, sz;
378		bool empty_field, empty_key;
379
380		field_start = 0;
381		key_start = 0;
382		empty_field = false;
383		empty_key = false;
384
385		find_field_start(s, ks, &field_start, &key_start,
386		    &empty_field, &empty_key);
387
388		if (empty_key)
389			sz = 0;
390		else {
391			key_end = find_field_end(s, ks);
392			sz = (key_end < key_start) ? 0 : (key_end - key_start);
393		}
394
395		ret = bwsalloc(sz);
396		if (sz)
397			bwsnocpy(ret, s, key_start, sz);
398	} else
399		ret = bwsalloc(0);
400
401	return (ret);
402}
403
404/*
405 * Preprocesses a line applying the necessary transformations
406 * specified by command line options and returns the preprocessed
407 * string, which can be used to compare.
408 */
409int
410preproc(struct bwstring *s, struct keys_array *ka)
411{
412
413	if (sort_opts_vals.kflag)
414		for (size_t i = 0; i < keys_num; i++) {
415			struct bwstring *key;
416			struct key_specs *kspecs;
417			struct sort_mods *sm;
418
419			kspecs = &(keys[i]);
420			key = cut_field(s, kspecs);
421
422			sm = &(kspecs->sm);
423			if (sm->dflag)
424				key = dictionary_order(key);
425			else if (sm->iflag)
426				key = ignore_nonprinting(key);
427			if (sm->fflag || sm->Mflag)
428				key = ignore_case(key);
429
430			set_key_on_keys_array(ka, key, i);
431		}
432	else {
433		struct bwstring *ret = NULL;
434		struct sort_mods *sm = default_sort_mods;
435
436		if (sm->bflag) {
437			if (ret == NULL)
438				ret = bwsdup(s);
439			ret = ignore_leading_blanks(ret);
440		}
441		if (sm->dflag) {
442			if (ret == NULL)
443				ret = bwsdup(s);
444			ret = dictionary_order(ret);
445		} else if (sm->iflag) {
446			if (ret == NULL)
447				ret = bwsdup(s);
448			ret = ignore_nonprinting(ret);
449		}
450		if (sm->fflag || sm->Mflag) {
451			if (ret == NULL)
452				ret = bwsdup(s);
453			ret = ignore_case(ret);
454		}
455		if (ret == NULL)
456			set_key_on_keys_array(ka, s, 0);
457		else
458			set_key_on_keys_array(ka, ret, 0);
459	}
460
461	return 0;
462}
463
464cmpcoll_t
465get_sort_func(struct sort_mods *sm)
466{
467
468	if (sm->nflag)
469		return (numcoll);
470	else if (sm->hflag)
471		return (hnumcoll);
472	else if (sm->gflag)
473		return (gnumcoll);
474	else if (sm->Mflag)
475		return (monthcoll);
476	else if (sm->Rflag)
477		return (randomcoll);
478	else if (sm->Vflag)
479		return (versioncoll);
480	else
481		return (wstrcoll);
482}
483
484/*
485 * Compares the given strings.  Returns a positive number if
486 * the first precedes the second, a negative number if the second is
487 * the preceding one, and zero if they are equal.  This function calls
488 * the underlying collate functions, which done the actual comparison.
489 */
490int
491key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
492{
493	struct key_value *kv1, *kv2;
494	struct sort_mods *sm;
495	int res = 0;
496
497	for (size_t i = 0; i < keys_num; ++i) {
498		kv1 = get_key_from_keys_array(ps1, i);
499		kv2 = get_key_from_keys_array(ps2, i);
500		sm = &(keys[i].sm);
501
502		if (sm->rflag)
503			res = sm->func(kv2, kv1, offset);
504		else
505			res = sm->func(kv1, kv2, offset);
506
507		if (res)
508			break;
509
510		/* offset applies to only the first key */
511		offset = 0;
512	}
513	return (res);
514}
515
516/*
517 * Compare two strings.
518 * Plain symbol-by-symbol comparison.
519 */
520int
521top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
522{
523
524	if (default_sort_mods->rflag) {
525		const struct bwstring *tmp;
526
527		tmp = s1;
528		s1 = s2;
529		s2 = tmp;
530	}
531
532	return (bwscoll(s1, s2, 0));
533}
534
535/*
536 * Compare a string and a sort list item, according to the sort specs.
537 */
538int
539str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
540{
541	struct keys_array *ka1;
542	int ret = 0;
543
544	ka1 = keys_array_alloc();
545
546	preproc(str1, ka1);
547
548	sort_list_item_make_key(*ss2);
549
550	if (debug_sort) {
551		bwsprintf(stdout, str1, "; s1=<", ">");
552		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
553	}
554
555	ret = key_coll(ka1, &((*ss2)->ka), 0);
556
557	if (debug_sort)
558		printf("; cmp1=%d", ret);
559
560	clean_keys_array(str1, ka1);
561	sort_free(ka1);
562
563	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
564		ret = top_level_str_coll(str1, ((*ss2)->str));
565		if (debug_sort)
566			printf("; cmp2=%d", ret);
567	}
568
569	if (debug_sort)
570		printf("\n");
571
572	return (ret);
573}
574
575/*
576 * Compare two sort list items, according to the sort specs.
577 */
578int
579list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
580    size_t offset)
581{
582	int ret;
583
584	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
585
586	if (debug_sort) {
587		if (offset)
588			printf("; offset=%d", (int) offset);
589		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
590		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
591		printf("; cmp1=%d\n", ret);
592	}
593
594	if (ret)
595		return (ret);
596
597	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
598		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
599		if (debug_sort)
600			printf("; cmp2=%d\n", ret);
601	}
602
603	return (ret);
604}
605
606/*
607 * Compare two sort list items, according to the sort specs.
608 */
609int
610list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
611{
612
613	return (list_coll_offset(ss1, ss2, 0));
614}
615
616#define	LSCDEF(N)							\
617static int 								\
618list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)	\
619{									\
620									\
621	return (list_coll_offset(ss1, ss2, N));				\
622}
623
624LSCDEF(1)
625LSCDEF(2)
626LSCDEF(3)
627LSCDEF(4)
628LSCDEF(5)
629LSCDEF(6)
630LSCDEF(7)
631LSCDEF(8)
632LSCDEF(9)
633LSCDEF(10)
634LSCDEF(11)
635LSCDEF(12)
636LSCDEF(13)
637LSCDEF(14)
638LSCDEF(15)
639LSCDEF(16)
640LSCDEF(17)
641LSCDEF(18)
642LSCDEF(19)
643LSCDEF(20)
644
645listcoll_t
646get_list_call_func(size_t offset)
647{
648	static const listcoll_t lsarray[] = { list_coll, list_coll_1,
649	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
650	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
651	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
652	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
653	    list_coll_18, list_coll_19, list_coll_20 };
654
655	if (offset <= 20)
656		return (lsarray[offset]);
657
658	return (list_coll);
659}
660
661/*
662 * Compare two sort list items, only by their original string.
663 */
664int
665list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
666{
667
668	return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
669}
670
671/*
672 * Maximum size of a number in the string (before or after decimal point)
673 */
674#define MAX_NUM_SIZE (128)
675
676/*
677 * Set suffix value
678 */
679static void setsuffix(wchar_t c, unsigned char *si)
680{
681	switch (c){
682	case L'k':
683	case L'K':
684		*si = 1;
685		break;
686	case L'M':
687		*si = 2;
688		break;
689	case L'G':
690		*si = 3;
691		break;
692	case L'T':
693		*si = 4;
694		break;
695	case L'P':
696		*si = 5;
697		break;
698	case L'E':
699		*si = 6;
700		break;
701	case L'Z':
702		*si = 7;
703		break;
704	case L'Y':
705		*si = 8;
706		break;
707	default:
708		*si = 0;
709	}
710}
711
712/*
713 * Read string s and parse the string into a fixed-decimal-point number.
714 * sign equals -1 if the number is negative (explicit plus is not allowed,
715 * according to GNU sort's "info sort".
716 * The number part before decimal point is in the smain, after the decimal
717 * point is in sfrac, tail is the pointer to the remainder of the string.
718 */
719static int
720read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
721{
722	bwstring_iterator s;
723
724	s = bws_begin(s0);
725
726	/* always end the fraction with zero, even if we have no fraction */
727	sfrac[0] = 0;
728
729	while (iswblank(bws_get_iter_value(s)))
730		s = bws_iterator_inc(s, 1);
731
732	if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
733		*sign = -1;
734		s = bws_iterator_inc(s, 1);
735	}
736
737	// This is '0', not '\0', do not change this
738	while (iswdigit(bws_get_iter_value(s)) &&
739	    (bws_get_iter_value(s) == L'0'))
740		s = bws_iterator_inc(s, 1);
741
742	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
743		if (iswdigit(bws_get_iter_value(s))) {
744			smain[*main_len] = bws_get_iter_value(s);
745			s = bws_iterator_inc(s, 1);
746			*main_len += 1;
747		} else if (symbol_thousands_sep &&
748		    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
749			s = bws_iterator_inc(s, 1);
750		else
751			break;
752	}
753
754	smain[*main_len] = 0;
755
756	if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
757		s = bws_iterator_inc(s, 1);
758		while (iswdigit(bws_get_iter_value(s)) &&
759		    *frac_len < MAX_NUM_SIZE) {
760			sfrac[*frac_len] = bws_get_iter_value(s);
761			s = bws_iterator_inc(s, 1);
762			*frac_len += 1;
763		}
764		sfrac[*frac_len] = 0;
765
766		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
767			--(*frac_len);
768			sfrac[*frac_len] = L'\0';
769		}
770	}
771
772	setsuffix(bws_get_iter_value(s),si);
773
774	if ((*main_len + *frac_len) == 0)
775		*sign = 0;
776
777	return (0);
778}
779
780/*
781 * Implements string sort.
782 */
783static int
784wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
785{
786
787	if (debug_sort) {
788		if (offset)
789			printf("; offset=%d\n", (int) offset);
790		bwsprintf(stdout, kv1->k, "; k1=<", ">");
791		printf("(%zu)", BWSLEN(kv1->k));
792		bwsprintf(stdout, kv2->k, ", k2=<", ">");
793		printf("(%zu)", BWSLEN(kv2->k));
794	}
795
796	return (bwscoll(kv1->k, kv2->k, offset));
797}
798
799/*
800 * Compare two suffixes
801 */
802static inline int
803cmpsuffix(unsigned char si1, unsigned char si2)
804{
805
806	return ((char)si1 - (char)si2);
807}
808
809/*
810 * Implements numeric sort for -n and -h.
811 */
812static int
813numcoll_impl(struct key_value *kv1, struct key_value *kv2,
814    size_t offset __unused, bool use_suffix)
815{
816	struct bwstring *s1, *s2;
817	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
818	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
819	int cmp_res, sign1, sign2;
820	size_t frac1, frac2, main1, main2;
821	unsigned char SI1, SI2;
822	bool e1, e2, key1_read, key2_read;
823
824	s1 = kv1->k;
825	s2 = kv2->k;
826	sign1 = sign2 = 0;
827	main1 = main2 = 0;
828	frac1 = frac2 = 0;
829
830	key1_read = key2_read = false;
831
832	if (debug_sort) {
833		bwsprintf(stdout, s1, "; k1=<", ">");
834		bwsprintf(stdout, s2, ", k2=<", ">");
835	}
836
837	if (s1 == s2)
838		return (0);
839
840	if (kv1->hint->status == HS_UNINITIALIZED) {
841		/* read the number from the string */
842		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
843		key1_read = true;
844		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
845		if(main1 < 1 && frac1 < 1)
846			kv1->hint->v.nh.empty=true;
847		kv1->hint->v.nh.si = SI1;
848		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
849		    HS_INITIALIZED : HS_ERROR;
850		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
851	}
852
853	if (kv2->hint->status == HS_UNINITIALIZED) {
854		/* read the number from the string */
855		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
856		key2_read = true;
857		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
858		if(main2 < 1 && frac2 < 1)
859			kv2->hint->v.nh.empty=true;
860		kv2->hint->v.nh.si = SI2;
861		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
862		    HS_INITIALIZED : HS_ERROR;
863		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
864	}
865
866	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
867	    HS_INITIALIZED) {
868		unsigned long long n1, n2;
869		bool neg1, neg2;
870
871		e1 = kv1->hint->v.nh.empty;
872		e2 = kv2->hint->v.nh.empty;
873
874		if (e1 && e2)
875			return (0);
876
877		neg1 = kv1->hint->v.nh.neg;
878		neg2 = kv2->hint->v.nh.neg;
879
880		if (neg1 && !neg2)
881			return (-1);
882		if (neg2 && !neg1)
883			return (+1);
884
885		if (e1)
886			return (neg2 ? +1 : -1);
887		else if (e2)
888			return (neg1 ? -1 : +1);
889
890
891		if (use_suffix) {
892			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
893			if (cmp_res)
894				return (neg1 ? -cmp_res : cmp_res);
895		}
896
897		n1 = kv1->hint->v.nh.n1;
898		n2 = kv2->hint->v.nh.n1;
899		if (n1 < n2)
900			return (neg1 ? +1 : -1);
901		else if (n1 > n2)
902			return (neg1 ? -1 : +1);
903	}
904
905	/* read the numbers from the strings */
906	if (!key1_read)
907		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
908	if (!key2_read)
909		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
910
911	e1 = ((main1 + frac1) == 0);
912	e2 = ((main2 + frac2) == 0);
913
914	if (e1 && e2)
915		return (0);
916
917	/* we know the result if the signs are different */
918	if (sign1 < 0 && sign2 >= 0)
919		return (-1);
920	if (sign1 >= 0 && sign2 < 0)
921		return (+1);
922
923	if (e1)
924		return ((sign2 < 0) ? +1 : -1);
925	else if (e2)
926		return ((sign1 < 0) ? -1 : +1);
927
928	if (use_suffix) {
929		cmp_res = cmpsuffix(SI1, SI2);
930		if (cmp_res)
931			return ((sign1 < 0) ? -cmp_res : cmp_res);
932	}
933
934	/* if both numbers are empty assume that the strings are equal */
935	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
936		return (0);
937
938	/*
939	 * if the main part is of different size, we know the result
940	 * (because the leading zeros are removed)
941	 */
942	if (main1 < main2)
943		cmp_res = -1;
944	else if (main1 > main2)
945		cmp_res = +1;
946	/* if the sizes are equal then simple non-collate string compare gives the correct result */
947	else
948		cmp_res = wcscmp(smain1, smain2);
949
950	/* check fraction */
951	if (!cmp_res)
952		cmp_res = wcscmp(sfrac1, sfrac2);
953
954	if (!cmp_res)
955		return (0);
956
957	/* reverse result if the signs are negative */
958	if (sign1 < 0 && sign2 < 0)
959		cmp_res = -cmp_res;
960
961	return (cmp_res);
962}
963
964/*
965 * Implements numeric sort (-n).
966 */
967static int
968numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
969{
970
971	return (numcoll_impl(kv1, kv2, offset, false));
972}
973
974/*
975 * Implements 'human' numeric sort (-h).
976 */
977static int
978hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
979{
980
981	return (numcoll_impl(kv1, kv2, offset, true));
982}
983
984/* Use hint space to memoize md5 computations, at least. */
985static void
986randomcoll_init_hint(struct key_value *kv, void *hash)
987{
988
989	memcpy(kv->hint->v.Rh.cached, hash, sizeof(kv->hint->v.Rh.cached));
990	kv->hint->status = HS_INITIALIZED;
991}
992
993/*
994 * Implements random sort (-R).
995 */
996static int
997randomcoll(struct key_value *kv1, struct key_value *kv2,
998    size_t offset __unused)
999{
1000	struct bwstring *s1, *s2;
1001	MD5_CTX ctx1, ctx2;
1002	unsigned char hash1[MD5_DIGEST_LENGTH], hash2[MD5_DIGEST_LENGTH];
1003	int cmp;
1004
1005	s1 = kv1->k;
1006	s2 = kv2->k;
1007
1008	if (debug_sort) {
1009		bwsprintf(stdout, s1, "; k1=<", ">");
1010		bwsprintf(stdout, s2, ", k2=<", ">");
1011	}
1012
1013	if (s1 == s2)
1014		return (0);
1015
1016	if (kv1->hint->status == HS_INITIALIZED &&
1017	    kv2->hint->status == HS_INITIALIZED) {
1018		cmp = memcmp(kv1->hint->v.Rh.cached,
1019		    kv2->hint->v.Rh.cached, sizeof(kv1->hint->v.Rh.cached));
1020		if (cmp != 0)
1021			return (cmp);
1022	}
1023
1024	memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
1025	memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
1026
1027	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1028	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1029
1030	MD5Final(hash1, &ctx1);
1031	MD5Final(hash2, &ctx2);
1032
1033	if (kv1->hint->status == HS_UNINITIALIZED)
1034		randomcoll_init_hint(kv1, hash1);
1035	if (kv2->hint->status == HS_UNINITIALIZED)
1036		randomcoll_init_hint(kv2, hash2);
1037
1038	return (memcmp(hash1, hash2, sizeof(hash1)));
1039}
1040
1041/*
1042 * Implements version sort (-V).
1043 */
1044static int
1045versioncoll(struct key_value *kv1, struct key_value *kv2,
1046    size_t offset __unused)
1047{
1048	struct bwstring *s1, *s2;
1049
1050	s1 = kv1->k;
1051	s2 = kv2->k;
1052
1053	if (debug_sort) {
1054		bwsprintf(stdout, s1, "; k1=<", ">");
1055		bwsprintf(stdout, s2, ", k2=<", ">");
1056	}
1057
1058	if (s1 == s2)
1059		return (0);
1060
1061	return (vcmp(s1, s2));
1062}
1063
1064/*
1065 * Check for minus infinity
1066 */
1067static inline bool
1068huge_minus(double d, int err1)
1069{
1070
1071	if (err1 == ERANGE)
1072		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1073			return (+1);
1074
1075	return (0);
1076}
1077
1078/*
1079 * Check for plus infinity
1080 */
1081static inline bool
1082huge_plus(double d, int err1)
1083{
1084
1085	if (err1 == ERANGE)
1086		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1087			return (+1);
1088
1089	return (0);
1090}
1091
1092/*
1093 * Check whether a function is a NAN
1094 */
1095static bool
1096is_nan(double d)
1097{
1098
1099	return ((d == NAN) || (isnan(d)));
1100}
1101
1102/*
1103 * Compare two NANs
1104 */
1105static int
1106cmp_nans(double d1, double d2)
1107{
1108
1109	if (d1 < d2)
1110		return (-1);
1111	if (d1 > d2)
1112		return (+1);
1113	return (0);
1114}
1115
1116/*
1117 * Implements general numeric sort (-g).
1118 */
1119static int
1120gnumcoll(struct key_value *kv1, struct key_value *kv2,
1121    size_t offset __unused)
1122{
1123	double d1, d2;
1124	int err1, err2;
1125	bool empty1, empty2, key1_read, key2_read;
1126
1127	d1 = d2 = 0;
1128	err1 = err2 = 0;
1129	key1_read = key2_read = false;
1130
1131	if (debug_sort) {
1132		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1133		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1134	}
1135
1136	if (kv1->hint->status == HS_UNINITIALIZED) {
1137		errno = 0;
1138		d1 = bwstod(kv1->k, &empty1);
1139		err1 = errno;
1140
1141		if (empty1)
1142			kv1->hint->v.gh.notnum = true;
1143		else if (err1 == 0) {
1144			kv1->hint->v.gh.d = d1;
1145			kv1->hint->v.gh.nan = is_nan(d1);
1146			kv1->hint->status = HS_INITIALIZED;
1147		} else
1148			kv1->hint->status = HS_ERROR;
1149
1150		key1_read = true;
1151	}
1152
1153	if (kv2->hint->status == HS_UNINITIALIZED) {
1154		errno = 0;
1155		d2 = bwstod(kv2->k, &empty2);
1156		err2 = errno;
1157
1158		if (empty2)
1159			kv2->hint->v.gh.notnum = true;
1160		else if (err2 == 0) {
1161			kv2->hint->v.gh.d = d2;
1162			kv2->hint->v.gh.nan = is_nan(d2);
1163			kv2->hint->status = HS_INITIALIZED;
1164		} else
1165			kv2->hint->status = HS_ERROR;
1166
1167		key2_read = true;
1168	}
1169
1170	if (kv1->hint->status == HS_INITIALIZED &&
1171	    kv2->hint->status == HS_INITIALIZED) {
1172		if (kv1->hint->v.gh.notnum)
1173			return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1174		else if (kv2->hint->v.gh.notnum)
1175			return (+1);
1176
1177		if (kv1->hint->v.gh.nan)
1178			return ((kv2->hint->v.gh.nan) ?
1179			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1180			    -1);
1181		else if (kv2->hint->v.gh.nan)
1182			return (+1);
1183
1184		d1 = kv1->hint->v.gh.d;
1185		d2 = kv2->hint->v.gh.d;
1186
1187		if (d1 < d2)
1188			return (-1);
1189		else if (d1 > d2)
1190			return (+1);
1191		else
1192			return (0);
1193	}
1194
1195	if (!key1_read) {
1196		errno = 0;
1197		d1 = bwstod(kv1->k, &empty1);
1198		err1 = errno;
1199	}
1200
1201	if (!key2_read) {
1202		errno = 0;
1203		d2 = bwstod(kv2->k, &empty2);
1204		err2 = errno;
1205	}
1206
1207	/* Non-value case: */
1208	if (empty1)
1209		return (empty2 ? 0 : -1);
1210	else if (empty2)
1211		return (+1);
1212
1213	/* NAN case */
1214	if (is_nan(d1))
1215		return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1216	else if (is_nan(d2))
1217		return (+1);
1218
1219	/* Infinities */
1220	if (err1 == ERANGE || err2 == ERANGE) {
1221		/* Minus infinity case */
1222		if (huge_minus(d1, err1)) {
1223			if (huge_minus(d2, err2)) {
1224				if (d1 < d2)
1225					return (-1);
1226				if (d1 > d2)
1227					return (+1);
1228				return (0);
1229			} else
1230				return (-1);
1231
1232		} else if (huge_minus(d2, err2)) {
1233			if (huge_minus(d1, err1)) {
1234				if (d1 < d2)
1235					return (-1);
1236				if (d1 > d2)
1237					return (+1);
1238				return (0);
1239			} else
1240				return (+1);
1241		}
1242
1243		/* Plus infinity case */
1244		if (huge_plus(d1, err1)) {
1245			if (huge_plus(d2, err2)) {
1246				if (d1 < d2)
1247					return (-1);
1248				if (d1 > d2)
1249					return (+1);
1250				return (0);
1251			} else
1252				return (+1);
1253		} else if (huge_plus(d2, err2)) {
1254			if (huge_plus(d1, err1)) {
1255				if (d1 < d2)
1256					return (-1);
1257				if (d1 > d2)
1258					return (+1);
1259				return (0);
1260			} else
1261				return (-1);
1262		}
1263	}
1264
1265	if (d1 < d2)
1266		return (-1);
1267	if (d1 > d2)
1268		return (+1);
1269
1270	return (0);
1271}
1272
1273/*
1274 * Implements month sort (-M).
1275 */
1276static int
1277monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1278{
1279	int val1, val2;
1280	bool key1_read, key2_read;
1281
1282	val1 = val2 = 0;
1283	key1_read = key2_read = false;
1284
1285	if (debug_sort) {
1286		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1287		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1288	}
1289
1290	if (kv1->hint->status == HS_UNINITIALIZED) {
1291		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1292		key1_read = true;
1293		kv1->hint->status = HS_INITIALIZED;
1294	}
1295
1296	if (kv2->hint->status == HS_UNINITIALIZED) {
1297		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1298		key2_read = true;
1299		kv2->hint->status = HS_INITIALIZED;
1300	}
1301
1302	if (kv1->hint->status == HS_INITIALIZED) {
1303		val1 = kv1->hint->v.Mh.m;
1304		key1_read = true;
1305	}
1306
1307	if (kv2->hint->status == HS_INITIALIZED) {
1308		val2 = kv2->hint->v.Mh.m;
1309		key2_read = true;
1310	}
1311
1312	if (!key1_read)
1313		val1 = bws_month_score(kv1->k);
1314	if (!key2_read)
1315		val2 = bws_month_score(kv2->k);
1316
1317	if (val1 == val2) {
1318		return (0);
1319	}
1320	if (val1 < val2)
1321		return (-1);
1322	return (+1);
1323}
1324