1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
5 * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include <errno.h>
34#include <err.h>
35#include <langinfo.h>
36#include <math.h>
37#if defined(SORT_THREADS)
38#include <pthread.h>
39#include <semaphore.h>
40#endif
41#include <stdlib.h>
42#include <string.h>
43#include <wchar.h>
44#include <wctype.h>
45#include <unistd.h>
46
47#include "coll.h"
48#include "radixsort.h"
49
50#define DEFAULT_SORT_FUNC_RADIXSORT mergesort
51
52#define TINY_NODE(sl) ((sl)->tosort_num < 65)
53#define SMALL_NODE(sl) ((sl)->tosort_num < 5)
54
55/* are we sorting in reverse order ? */
56static bool reverse_sort;
57
58/* sort sub-levels array size */
59static const size_t slsz = 256 * sizeof(struct sort_level*);
60
61/* one sort level structure */
62struct sort_level
63{
64	struct sort_level	**sublevels;
65	struct sort_list_item	**leaves;
66	struct sort_list_item	**sorted;
67	struct sort_list_item	**tosort;
68	size_t			  leaves_num;
69	size_t			  leaves_sz;
70	size_t			  level;
71	size_t			  real_sln;
72	size_t			  start_position;
73	size_t			  sln;
74	size_t			  tosort_num;
75	size_t			  tosort_sz;
76};
77
78/* stack of sort levels ready to be sorted */
79struct level_stack {
80	struct level_stack	 *next;
81	struct sort_level	 *sl;
82};
83
84static struct level_stack *g_ls;
85
86#if defined(SORT_THREADS)
87/* stack guarding mutex */
88static pthread_cond_t g_ls_cond;
89static pthread_mutex_t g_ls_mutex;
90
91/* counter: how many items are left */
92static size_t sort_left;
93/* guarding mutex */
94
95/* semaphore to count threads */
96static sem_t mtsem;
97
98/*
99 * Decrement items counter
100 */
101static inline void
102sort_left_dec(size_t n)
103{
104	pthread_mutex_lock(&g_ls_mutex);
105	sort_left -= n;
106	if (sort_left == 0 && nthreads > 1)
107		pthread_cond_broadcast(&g_ls_cond);
108	pthread_mutex_unlock(&g_ls_mutex);
109}
110
111/*
112 * Do we have something to sort ?
113 *
114 * This routine does not need to be locked.
115 */
116static inline bool
117have_sort_left(void)
118{
119	bool ret;
120
121	ret = (sort_left > 0);
122
123	return (ret);
124}
125
126#else
127
128#define sort_left_dec(n)
129
130#endif /* SORT_THREADS */
131
132static void
133_push_ls(struct level_stack *ls)
134{
135
136	ls->next = g_ls;
137	g_ls = ls;
138}
139
140/*
141 * Push sort level to the stack
142 */
143static inline void
144push_ls(struct sort_level *sl)
145{
146	struct level_stack *new_ls;
147
148	new_ls = sort_malloc(sizeof(struct level_stack));
149	new_ls->sl = sl;
150
151#if defined(SORT_THREADS)
152	if (nthreads > 1) {
153		pthread_mutex_lock(&g_ls_mutex);
154		_push_ls(new_ls);
155		pthread_cond_signal(&g_ls_cond);
156		pthread_mutex_unlock(&g_ls_mutex);
157	} else
158#endif
159		_push_ls(new_ls);
160}
161
162/*
163 * Pop sort level from the stack (single-threaded style)
164 */
165static inline struct sort_level*
166pop_ls_st(void)
167{
168	struct sort_level *sl;
169
170	if (g_ls) {
171		struct level_stack *saved_ls;
172
173		sl = g_ls->sl;
174		saved_ls = g_ls;
175		g_ls = g_ls->next;
176		sort_free(saved_ls);
177	} else
178		sl = NULL;
179
180	return (sl);
181}
182
183#if defined(SORT_THREADS)
184
185/*
186 * Pop sort level from the stack (multi-threaded style)
187 */
188static inline struct sort_level*
189pop_ls_mt(void)
190{
191	struct level_stack *saved_ls;
192	struct sort_level *sl;
193
194	pthread_mutex_lock(&g_ls_mutex);
195
196	for (;;) {
197		if (g_ls) {
198			sl = g_ls->sl;
199			saved_ls = g_ls;
200			g_ls = g_ls->next;
201			break;
202		}
203		sl = NULL;
204		saved_ls = NULL;
205
206		if (have_sort_left() == 0)
207			break;
208		pthread_cond_wait(&g_ls_cond, &g_ls_mutex);
209	}
210
211	pthread_mutex_unlock(&g_ls_mutex);
212
213	sort_free(saved_ls);
214
215	return (sl);
216}
217
218#endif /* defined(SORT_THREADS) */
219
220static void
221add_to_sublevel(struct sort_level *sl, struct sort_list_item *item, size_t indx)
222{
223	struct sort_level *ssl;
224
225	ssl = sl->sublevels[indx];
226
227	if (ssl == NULL) {
228		ssl = sort_malloc(sizeof(struct sort_level));
229		memset(ssl, 0, sizeof(struct sort_level));
230
231		ssl->level = sl->level + 1;
232		sl->sublevels[indx] = ssl;
233
234		++(sl->real_sln);
235	}
236
237	if (++(ssl->tosort_num) > ssl->tosort_sz) {
238		ssl->tosort_sz = ssl->tosort_num + 128;
239		ssl->tosort = sort_realloc(ssl->tosort,
240		    sizeof(struct sort_list_item*) * (ssl->tosort_sz));
241	}
242
243	ssl->tosort[ssl->tosort_num - 1] = item;
244}
245
246static inline void
247add_leaf(struct sort_level *sl, struct sort_list_item *item)
248{
249
250	if (++(sl->leaves_num) > sl->leaves_sz) {
251		sl->leaves_sz = sl->leaves_num + 128;
252		sl->leaves = sort_realloc(sl->leaves,
253		    (sizeof(struct sort_list_item*) * (sl->leaves_sz)));
254	}
255	sl->leaves[sl->leaves_num - 1] = item;
256}
257
258static inline int
259get_wc_index(struct sort_list_item *sli, size_t level)
260{
261	const size_t wcfact = (mb_cur_max == 1) ? 1 : sizeof(wchar_t);
262	const struct key_value *kv;
263	const struct bwstring *bws;
264
265	kv = get_key_from_keys_array(&sli->ka, 0);
266	bws = kv->k;
267
268	if ((BWSLEN(bws) * wcfact > level)) {
269		wchar_t res;
270
271		/*
272		 * Sort wchar strings a byte at a time, rather than a single
273		 * byte from each wchar.
274		 */
275		res = (wchar_t)BWS_GET(bws, level / wcfact);
276		/* Sort most-significant byte first. */
277		if (level % wcfact < wcfact - 1)
278			res = (res >> (8 * (wcfact - 1 - (level % wcfact))));
279
280		return (res & 0xff);
281	}
282
283	return (-1);
284}
285
286static void
287place_item(struct sort_level *sl, size_t item)
288{
289	struct sort_list_item *sli;
290	int c;
291
292	sli = sl->tosort[item];
293	c = get_wc_index(sli, sl->level);
294
295	if (c == -1)
296		add_leaf(sl, sli);
297	else
298		add_to_sublevel(sl, sli, c);
299}
300
301static void
302free_sort_level(struct sort_level *sl)
303{
304
305	if (sl) {
306		if (sl->leaves)
307			sort_free(sl->leaves);
308
309		if (sl->level > 0)
310			sort_free(sl->tosort);
311
312		if (sl->sublevels) {
313			struct sort_level *slc;
314			size_t sln;
315
316			sln = sl->sln;
317
318			for (size_t i = 0; i < sln; ++i) {
319				slc = sl->sublevels[i];
320				if (slc)
321					free_sort_level(slc);
322			}
323
324			sort_free(sl->sublevels);
325		}
326
327		sort_free(sl);
328	}
329}
330
331static void
332run_sort_level_next(struct sort_level *sl)
333{
334	const size_t wcfact = (mb_cur_max == 1) ? 1 : sizeof(wchar_t);
335	struct sort_level *slc;
336	size_t i, sln, tosort_num;
337
338	if (sl->sublevels) {
339		sort_free(sl->sublevels);
340		sl->sublevels = NULL;
341	}
342
343	switch (sl->tosort_num) {
344	case 0:
345		goto end;
346	case (1):
347		sl->sorted[sl->start_position] = sl->tosort[0];
348		sort_left_dec(1);
349		goto end;
350	case (2):
351		/*
352		 * Radixsort only processes a single byte at a time.  In wchar
353		 * mode, this can be a subset of the length of a character.
354		 * list_coll_offset() offset is in units of wchar, not bytes.
355		 * So to calculate the offset, we must divide by
356		 * sizeof(wchar_t) and round down to the index of the first
357		 * character this level references.
358		 */
359		if (list_coll_offset(&(sl->tosort[0]), &(sl->tosort[1]),
360		    sl->level / wcfact) > 0) {
361			sl->sorted[sl->start_position++] = sl->tosort[1];
362			sl->sorted[sl->start_position] = sl->tosort[0];
363		} else {
364			sl->sorted[sl->start_position++] = sl->tosort[0];
365			sl->sorted[sl->start_position] = sl->tosort[1];
366		}
367		sort_left_dec(2);
368
369		goto end;
370	default:
371		if (TINY_NODE(sl) || (sl->level > 15)) {
372			listcoll_t func;
373
374			/*
375			 * Collate comparison offset is in units of
376			 * character-width, so we must divide the level (bytes)
377			 * by operating character width (wchar_t or char).  See
378			 * longer comment above.
379			 */
380			func = get_list_call_func(sl->level / wcfact);
381
382			sl->leaves = sl->tosort;
383			sl->leaves_num = sl->tosort_num;
384			sl->leaves_sz = sl->leaves_num;
385			sl->leaves = sort_realloc(sl->leaves,
386			    (sizeof(struct sort_list_item *) *
387			    (sl->leaves_sz)));
388			sl->tosort = NULL;
389			sl->tosort_num = 0;
390			sl->tosort_sz = 0;
391			sl->sln = 0;
392			sl->real_sln = 0;
393			if (sort_opts_vals.sflag) {
394				if (mergesort(sl->leaves, sl->leaves_num,
395				    sizeof(struct sort_list_item *),
396				    (int(*)(const void *, const void *)) func) == -1)
397					/* NOTREACHED */
398					err(2, "Radix sort error 3");
399			} else
400				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
401				    sizeof(struct sort_list_item *),
402				    (int(*)(const void *, const void *)) func);
403
404			memcpy(sl->sorted + sl->start_position,
405			    sl->leaves, sl->leaves_num *
406			    sizeof(struct sort_list_item*));
407
408			sort_left_dec(sl->leaves_num);
409
410			goto end;
411		} else {
412			sl->tosort_sz = sl->tosort_num;
413			sl->tosort = sort_realloc(sl->tosort,
414			    sizeof(struct sort_list_item*) * (sl->tosort_sz));
415		}
416	}
417
418	sl->sln = 256;
419	sl->sublevels = sort_malloc(slsz);
420	memset(sl->sublevels, 0, slsz);
421
422	sl->real_sln = 0;
423
424	tosort_num = sl->tosort_num;
425	for (i = 0; i < tosort_num; ++i)
426		place_item(sl, i);
427
428	sort_free(sl->tosort);
429	sl->tosort = NULL;
430	sl->tosort_num = 0;
431	sl->tosort_sz = 0;
432
433	if (sl->leaves_num > 1) {
434		if (keys_num > 1) {
435			if (sort_opts_vals.sflag) {
436				mergesort(sl->leaves, sl->leaves_num,
437				    sizeof(struct sort_list_item *),
438				    (int(*)(const void *, const void *)) list_coll);
439			} else {
440				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
441				    sizeof(struct sort_list_item *),
442				    (int(*)(const void *, const void *)) list_coll);
443			}
444		} else if (!sort_opts_vals.sflag && sort_opts_vals.complex_sort) {
445			DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
446			    sizeof(struct sort_list_item *),
447			    (int(*)(const void *, const void *)) list_coll_by_str_only);
448		}
449	}
450
451	sl->leaves_sz = sl->leaves_num;
452	sl->leaves = sort_realloc(sl->leaves, (sizeof(struct sort_list_item *) *
453	    (sl->leaves_sz)));
454
455	if (!reverse_sort) {
456		memcpy(sl->sorted + sl->start_position, sl->leaves,
457		    sl->leaves_num * sizeof(struct sort_list_item*));
458		sl->start_position += sl->leaves_num;
459		sort_left_dec(sl->leaves_num);
460
461		sort_free(sl->leaves);
462		sl->leaves = NULL;
463		sl->leaves_num = 0;
464		sl->leaves_sz = 0;
465
466		sln = sl->sln;
467
468		for (i = 0; i < sln; ++i) {
469			slc = sl->sublevels[i];
470
471			if (slc) {
472				slc->sorted = sl->sorted;
473				slc->start_position = sl->start_position;
474				sl->start_position += slc->tosort_num;
475				if (SMALL_NODE(slc))
476					run_sort_level_next(slc);
477				else
478					push_ls(slc);
479				sl->sublevels[i] = NULL;
480			}
481		}
482
483	} else {
484		size_t n;
485
486		sln = sl->sln;
487
488		for (i = 0; i < sln; ++i) {
489			n = sln - i - 1;
490			slc = sl->sublevels[n];
491
492			if (slc) {
493				slc->sorted = sl->sorted;
494				slc->start_position = sl->start_position;
495				sl->start_position += slc->tosort_num;
496				if (SMALL_NODE(slc))
497					run_sort_level_next(slc);
498				else
499					push_ls(slc);
500				sl->sublevels[n] = NULL;
501			}
502		}
503
504		memcpy(sl->sorted + sl->start_position, sl->leaves,
505		    sl->leaves_num * sizeof(struct sort_list_item*));
506		sort_left_dec(sl->leaves_num);
507	}
508
509end:
510	free_sort_level(sl);
511}
512
513/*
514 * Single-threaded sort cycle
515 */
516static void
517run_sort_cycle_st(void)
518{
519	struct sort_level *slc;
520
521	for (;;) {
522		slc = pop_ls_st();
523		if (slc == NULL) {
524			break;
525		}
526		run_sort_level_next(slc);
527	}
528}
529
530#if defined(SORT_THREADS)
531
532/*
533 * Multi-threaded sort cycle
534 */
535static void
536run_sort_cycle_mt(void)
537{
538	struct sort_level *slc;
539
540	for (;;) {
541		slc = pop_ls_mt();
542		if (slc == NULL)
543			break;
544		run_sort_level_next(slc);
545	}
546}
547
548/*
549 * Sort cycle thread (in multi-threaded mode)
550 */
551static void*
552sort_thread(void* arg)
553{
554	run_sort_cycle_mt();
555	sem_post(&mtsem);
556
557	return (arg);
558}
559
560#endif /* defined(SORT_THREADS) */
561
562static void
563run_top_sort_level(struct sort_level *sl)
564{
565	struct sort_level *slc;
566
567	reverse_sort = sort_opts_vals.kflag ? keys[0].sm.rflag :
568	    default_sort_mods->rflag;
569
570	sl->start_position = 0;
571	sl->sln = 256;
572	sl->sublevels = sort_malloc(slsz);
573	memset(sl->sublevels, 0, slsz);
574
575	for (size_t i = 0; i < sl->tosort_num; ++i)
576		place_item(sl, i);
577
578	if (sl->leaves_num > 1) {
579		if (keys_num > 1) {
580			if (sort_opts_vals.sflag) {
581				mergesort(sl->leaves, sl->leaves_num,
582				    sizeof(struct sort_list_item *),
583				    (int(*)(const void *, const void *)) list_coll);
584			} else {
585				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
586				    sizeof(struct sort_list_item *),
587				    (int(*)(const void *, const void *)) list_coll);
588			}
589		} else if (!sort_opts_vals.sflag && sort_opts_vals.complex_sort) {
590			DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
591			    sizeof(struct sort_list_item *),
592			    (int(*)(const void *, const void *)) list_coll_by_str_only);
593		}
594	}
595
596	if (!reverse_sort) {
597		memcpy(sl->tosort + sl->start_position, sl->leaves,
598		    sl->leaves_num * sizeof(struct sort_list_item*));
599		sl->start_position += sl->leaves_num;
600		sort_left_dec(sl->leaves_num);
601
602		for (size_t i = 0; i < sl->sln; ++i) {
603			slc = sl->sublevels[i];
604
605			if (slc) {
606				slc->sorted = sl->tosort;
607				slc->start_position = sl->start_position;
608				sl->start_position += slc->tosort_num;
609				push_ls(slc);
610				sl->sublevels[i] = NULL;
611			}
612		}
613
614	} else {
615		size_t n;
616
617		for (size_t i = 0; i < sl->sln; ++i) {
618
619			n = sl->sln - i - 1;
620			slc = sl->sublevels[n];
621
622			if (slc) {
623				slc->sorted = sl->tosort;
624				slc->start_position = sl->start_position;
625				sl->start_position += slc->tosort_num;
626				push_ls(slc);
627				sl->sublevels[n] = NULL;
628			}
629		}
630
631		memcpy(sl->tosort + sl->start_position, sl->leaves,
632		    sl->leaves_num * sizeof(struct sort_list_item*));
633
634		sort_left_dec(sl->leaves_num);
635	}
636
637#if defined(SORT_THREADS)
638	if (nthreads < 2) {
639#endif
640		run_sort_cycle_st();
641#if defined(SORT_THREADS)
642	} else {
643		size_t i;
644
645		for(i = 0; i < nthreads; ++i) {
646			pthread_attr_t attr;
647			pthread_t pth;
648
649			pthread_attr_init(&attr);
650			pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED);
651
652			for (;;) {
653				int res = pthread_create(&pth, &attr,
654				    sort_thread, NULL);
655				if (res >= 0)
656					break;
657				if (errno == EAGAIN) {
658					pthread_yield();
659					continue;
660				}
661				err(2, NULL);
662			}
663
664			pthread_attr_destroy(&attr);
665		}
666
667		for (i = 0; i < nthreads; ++i)
668			sem_wait(&mtsem);
669	}
670#endif /* defined(SORT_THREADS) */
671}
672
673static void
674run_sort(struct sort_list_item **base, size_t nmemb)
675{
676	struct sort_level *sl;
677
678#if defined(SORT_THREADS)
679	size_t nthreads_save = nthreads;
680	if (nmemb < MT_SORT_THRESHOLD)
681		nthreads = 1;
682
683	if (nthreads > 1) {
684		pthread_mutexattr_t mattr;
685
686		pthread_mutexattr_init(&mattr);
687		pthread_mutexattr_settype(&mattr, PTHREAD_MUTEX_ADAPTIVE_NP);
688
689		pthread_mutex_init(&g_ls_mutex, &mattr);
690		pthread_cond_init(&g_ls_cond, NULL);
691
692		pthread_mutexattr_destroy(&mattr);
693
694		sem_init(&mtsem, 0, 0);
695
696	}
697#endif
698
699	sl = sort_malloc(sizeof(struct sort_level));
700	memset(sl, 0, sizeof(struct sort_level));
701
702	sl->tosort = base;
703	sl->tosort_num = nmemb;
704	sl->tosort_sz = nmemb;
705
706#if defined(SORT_THREADS)
707	sort_left = nmemb;
708#endif
709
710	run_top_sort_level(sl);
711
712	free_sort_level(sl);
713
714#if defined(SORT_THREADS)
715	if (nthreads > 1) {
716		sem_destroy(&mtsem);
717		pthread_mutex_destroy(&g_ls_mutex);
718	}
719	nthreads = nthreads_save;
720#endif
721}
722
723void
724rxsort(struct sort_list_item **base, size_t nmemb)
725{
726
727	run_sort(base, nmemb);
728}
729