1/****************************************************************************/
2/*-
3 * Copyright (c) 1992, 1993
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 *    may be used to endorse or promote products derived from this software
16 *    without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#if defined(LIBC_SCCS) && !defined(lint)
32static char sccsid[] = "@(#)qsort.c	8.1 (Berkeley) 6/4/93";
33#endif /* LIBC_SCCS and not lint */
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: src/lib/libc/stdlib/qsort.c,v 1.15 2008/01/14 09:21:34 das Exp $");
36
37#include <stdlib.h>
38#include <pthread.h>
39#include <dispatch/dispatch.h>
40#include <stddef.h>
41#include <string.h>
42#include <libkern/OSAtomic.h>
43#include <sys/mman.h>
44#include <errno.h>
45#define __APPLE_API_PRIVATE
46#include <machine/cpu_capabilities.h>
47
48#ifdef I_AM_PSORT_R
49typedef int		 cmp_t(void *, const void *, const void *);
50#else
51typedef int		 cmp_t(const void *, const void *);
52#endif
53#ifdef I_AM_PSORT_B
54static inline char	*med3(char *, char *, char *, cmp_t ^, void *) __attribute__((always_inline));
55#else
56static inline char	*med3(char *, char *, char *, cmp_t *, void *) __attribute__((always_inline));
57#endif
58static inline void	 swapfunc(char *, char *, int, int) __attribute__((always_inline));
59
60#define min(a, b)	(a) < (b) ? a : b
61
62#define NARGS			((PAGESIZE - offsetof(struct page, args)) / sizeof(union args))
63#define PAGESIZE		4096
64#define PARALLEL_MIN_SIZE	2000	/* determine heuristically */
65
66struct shared; /* forward reference */
67union args {
68    union args *next;
69    struct {
70	struct shared *shared;
71	void *a;
72	size_t n;
73	int depth_limit;
74    } /* anonymous */;
75};
76
77struct page {
78    struct page *next;
79    union args args[0];
80};
81
82struct shared {
83    char *who;
84    union args *freelist;
85    struct page *pagelist;
86#ifdef I_AM_PSORT_R
87    void *thunk;
88#endif
89#ifdef I_AM_PSORT_B
90    cmp_t ^cmp;
91#else
92    cmp_t *cmp;
93#endif
94    size_t es;
95    size_t turnoff;
96    dispatch_queue_t queue;
97    dispatch_group_t group;
98    OSSpinLock sharedlock;
99};
100
101static union args *
102getargs(struct shared *shared)
103{
104    union args *args;
105
106    OSSpinLockLock(&shared->sharedlock);
107    if(!shared->freelist) {
108	struct page *page;
109	union args *prev;
110	int i;
111	if((page = (struct page *)mmap(NULL, PAGESIZE, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0)) == NULL)
112	    return NULL;
113	page->next = shared->pagelist;
114	shared->pagelist = page;
115	prev = NULL;
116	for(args = page->args, i = NARGS; i > 0; args++, i--) {
117	    args->next = prev;
118	    prev = args;
119	}
120	shared->freelist = prev;
121    }
122    args = shared->freelist;
123    shared->freelist = args->next;
124    OSSpinLockUnlock(&shared->sharedlock);
125    return args;
126}
127
128static void
129returnargs(struct shared *shared, union args *args)
130{
131    OSSpinLockLock(&shared->sharedlock);
132    args->next = shared->freelist;
133    shared->freelist = args;
134    OSSpinLockUnlock(&shared->sharedlock);
135}
136
137/*
138 * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
139 */
140#define swapcode(TYPE, parmi, parmj, n) { 		\
141	long i = (n) / sizeof (TYPE); 			\
142	TYPE *pi = (TYPE *) (parmi); 		\
143	TYPE *pj = (TYPE *) (parmj); 		\
144	do { 						\
145		TYPE	t = *pi;		\
146		*pi++ = *pj;				\
147		*pj++ = t;				\
148        } while (--i > 0);				\
149}
150
151#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
152	es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
153
154static inline void
155swapfunc(a, b, n, swaptype)
156	char *a, *b;
157	int n, swaptype;
158{
159	if(swaptype <= 1)
160		swapcode(long, a, b, n)
161	else
162		swapcode(char, a, b, n)
163}
164
165#define swap(a, b)					\
166	if (swaptype == 0) {				\
167		long t = *(long *)(a);			\
168		*(long *)(a) = *(long *)(b);		\
169		*(long *)(b) = t;			\
170	} else						\
171		swapfunc(a, b, es, swaptype)
172
173#define vecswap(a, b, n) 	if ((n) > 0) swapfunc(a, b, n, swaptype)
174
175#ifdef I_AM_PSORT_R
176#define	CMP(t, x, y) (cmp((t), (x), (y)))
177#else
178#define	CMP(t, x, y) (cmp((x), (y)))
179#endif
180
181static inline char *
182med3(char *a, char *b, char *c,
183#ifdef I_AM_PSORT_B
184cmp_t ^cmp,
185#else
186cmp_t *cmp,
187#endif
188void *thunk
189#ifndef I_AM_PSORT_R
190__unused
191#endif
192)
193{
194	return CMP(thunk, a, b) < 0 ?
195	       (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a ))
196              :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c ));
197}
198
199#ifdef __LP64__
200#define DEPTH(x)	(2 * (flsl((long)(x)) - 1))
201#else /* !__LP64__ */
202#define DEPTH(x)	(2 * (fls((int)(x)) - 1))
203#endif /* __LP64__ */
204
205#ifdef I_AM_PSORT_R
206int __heapsort_r(void *, size_t, size_t, void *, int (*)(void *, const void *, const void *));
207#endif
208
209static void _psort_parallel(void *x);
210
211static void
212_psort(void *a, size_t n, size_t es,
213#ifdef I_AM_PSORT_R
214void *thunk,
215#else
216#define thunk	NULL
217#endif
218#ifdef I_AM_PSORT_B
219cmp_t ^cmp,
220#else
221cmp_t *cmp,
222#endif
223int depth_limit, struct shared *shared)
224{
225	char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
226	size_t d, r;
227	int cmp_result;
228	int swaptype, swap_cnt;
229
230loop:
231	if (depth_limit-- <= 0) {
232#ifdef I_AM_PSORT_B
233		heapsort_b(a, n, es, cmp);
234#elif defined(I_AM_PSORT_R)
235		__heapsort_r(a, n, es, thunk, cmp);
236#else
237		heapsort(a, n, es, cmp);
238#endif
239		return;
240	}
241	SWAPINIT(a, es);
242	swap_cnt = 0;
243	if (n < 7) {
244		for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
245			for (pl = pm;
246			     pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
247			     pl -= es)
248				swap(pl, pl - es);
249		return;
250	}
251	pm = (char *)a + (n / 2) * es;
252	if (n > 7) {
253		pl = a;
254		pn = (char *)a + (n - 1) * es;
255		if (n > 40) {
256			d = (n / 8) * es;
257			pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk);
258			pm = med3(pm - d, pm, pm + d, cmp, thunk);
259			pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk);
260		}
261		pm = med3(pl, pm, pn, cmp, thunk);
262	}
263	swap(a, pm);
264	pa = pb = (char *)a + es;
265
266	pc = pd = (char *)a + (n - 1) * es;
267	for (;;) {
268		while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) {
269			if (cmp_result == 0) {
270				swap_cnt = 1;
271				swap(pa, pb);
272				pa += es;
273			}
274			pb += es;
275		}
276		while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) {
277			if (cmp_result == 0) {
278				swap_cnt = 1;
279				swap(pc, pd);
280				pd -= es;
281			}
282			pc -= es;
283		}
284		if (pb > pc)
285			break;
286		swap(pb, pc);
287		swap_cnt = 1;
288		pb += es;
289		pc -= es;
290	}
291
292	pn = (char *)a + n * es;
293	r = min(pa - (char *)a, pb - pa);
294	vecswap(a, pb - r, r);
295	r = min(pd - pc, pn - pd - es);
296	vecswap(pb, pn - r, r);
297
298	if (swap_cnt == 0) {  /* Switch to insertion sort */
299		r = 1 + n / 4; /* n >= 7, so r >= 2 */
300		for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
301			for (pl = pm;
302			     pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
303			     pl -= es) {
304				swap(pl, pl - es);
305				if (++swap_cnt > r) goto nevermind;
306			}
307		return;
308	}
309
310nevermind:
311	if ((r = pb - pa) > es) {
312		r /= es;
313		if (shared && r > shared->turnoff) {
314			union args *args = getargs(shared);
315
316			if (args == NULL)
317				LIBC_ABORT("%s: getargs: %s", shared->who, strerror(errno));
318			args->shared = shared;
319			args->a = a;
320			args->n = r;
321			args->depth_limit = depth_limit;
322			dispatch_group_async_f(shared->group, shared->queue, args,
323					_psort_parallel);
324		} else {
325#ifdef I_AM_PSORT_R
326			_psort(a, r, es, thunk, cmp, depth_limit, NULL);
327#else
328			_psort(a, r, es, cmp, depth_limit, NULL);
329#endif
330		}
331	}
332	if ((r = pd - pc) > es) {
333		/* Iterate rather than recurse to save stack space */
334		a = pn - r;
335		n = r / es;
336		goto loop;
337	}
338/*		psort(pn - r, r / es, es, cmp);*/
339}
340
341static void
342_psort_parallel(void *x)
343{
344	union args *args = (union args *)x;
345	struct shared *shared = args->shared;
346
347	_psort(args->a, args->n, shared->es,
348#ifdef I_AM_PSORT_R
349		shared->thunk,
350#endif
351		shared->cmp, args->depth_limit, shared);
352	returnargs(shared, args);
353}
354
355/* fast, approximate integer square root */
356static size_t
357isqrt(size_t x)
358{
359    size_t s = 1L << (flsl(x) / 2);
360    return (s + x / s) / 2;
361}
362
363void
364#ifdef I_AM_PSORT_R
365psort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp)
366#elif defined(I_AM_PSORT_B)
367psort_b(void *a, size_t n, size_t es, cmp_t ^cmp)
368#else
369psort(void *a, size_t n, size_t es, cmp_t *cmp)
370#endif
371{
372	if (n >= PARALLEL_MIN_SIZE && _NumCPUs() > 1) {
373		struct shared shared;
374		union args *args;
375
376		bzero(&shared, sizeof(shared));
377		shared.sharedlock = OS_SPINLOCK_INIT;
378		if ((args = getargs(&shared)) != NULL) {
379			struct page *p, *pp;
380#ifdef I_AM_PSORT_R
381			shared.who = "psort_r";
382			shared.thunk = thunk;
383#elif defined(I_AM_PSORT_B)
384			shared.who = "psort_b";
385#else
386			shared.who = "psort";
387#endif
388			shared.cmp = cmp;
389			shared.es = es;
390			shared.queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
391			shared.group = dispatch_group_create();
392			args->a = a;
393			args->n = n;
394			args->depth_limit = DEPTH(n);
395			args->shared = &shared;
396			/*
397			 * The turnoff value is the size of a partition that,
398			 * below which, we stop doing in parallel, and just do
399			 * in the current thread.  The value of sqrt(n) was
400			 * determined heuristically.  There is a smaller
401			 * dependence on the slowness of the comparison
402			 * function, and there might be a dependence on the
403			 * number of processors, but the algorithm has not been
404			 * determined.  Because the sensitivity to the turnoff
405			 * value is relatively low, we use a fast, approximate
406			 * integer square root routine that is good enough for
407			 * this purpose.
408			 */
409			shared.turnoff = isqrt(n);
410			_psort_parallel(args);
411
412			/* wait for queue to drain */
413			dispatch_group_wait(shared.group, DISPATCH_TIME_FOREVER);
414			dispatch_release(shared.group);
415			for(p = shared.pagelist; p; p = pp) {
416				pp = p->next;
417				munmap(p, PAGESIZE);
418			}
419			return;
420		}
421	}
422	/* Just call qsort */
423#ifdef I_AM_PSORT_R
424	qsort_r(a, n, es, thunk, cmp);
425#elif defined(I_AM_PSORT_B)
426	qsort_b(a, n, es, cmp);
427#else
428	qsort(a, n, es, cmp);
429#endif
430}
431