1/****************************************************************************/ 2/*- 3 * Copyright (c) 1992, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 4. Neither the name of the University nor the names of its contributors 15 * may be used to endorse or promote products derived from this software 16 * without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31#if defined(LIBC_SCCS) && !defined(lint) 32static char sccsid[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93"; 33#endif /* LIBC_SCCS and not lint */ 34#include <sys/cdefs.h> 35__FBSDID("$FreeBSD: src/lib/libc/stdlib/qsort.c,v 1.15 2008/01/14 09:21:34 das Exp $"); 36 37#include <stdlib.h> 38#include <pthread.h> 39#include <dispatch/dispatch.h> 40#include <stddef.h> 41#include <string.h> 42#include <libkern/OSAtomic.h> 43#include <sys/mman.h> 44#include <errno.h> 45#define __APPLE_API_PRIVATE 46#include <machine/cpu_capabilities.h> 47 48#ifdef I_AM_PSORT_R 49typedef int cmp_t(void *, const void *, const void *); 50#else 51typedef int cmp_t(const void *, const void *); 52#endif 53#ifdef I_AM_PSORT_B 54static inline char *med3(char *, char *, char *, cmp_t ^, void *) __attribute__((always_inline)); 55#else 56static inline char *med3(char *, char *, char *, cmp_t *, void *) __attribute__((always_inline)); 57#endif 58static inline void swapfunc(char *, char *, int, int) __attribute__((always_inline)); 59 60#define min(a, b) (a) < (b) ? a : b 61 62#define NARGS ((PAGESIZE - offsetof(struct page, args)) / sizeof(union args)) 63#define PAGESIZE 4096 64#define PARALLEL_MIN_SIZE 2000 /* determine heuristically */ 65 66struct shared; /* forward reference */ 67union args { 68 union args *next; 69 struct { 70 struct shared *shared; 71 void *a; 72 size_t n; 73 int depth_limit; 74 } /* anonymous */; 75}; 76 77struct page { 78 struct page *next; 79 union args args[0]; 80}; 81 82struct shared { 83 char *who; 84 union args *freelist; 85 struct page *pagelist; 86#ifdef I_AM_PSORT_R 87 void *thunk; 88#endif 89#ifdef I_AM_PSORT_B 90 cmp_t ^cmp; 91#else 92 cmp_t *cmp; 93#endif 94 size_t es; 95 size_t turnoff; 96 dispatch_queue_t queue; 97 dispatch_group_t group; 98 OSSpinLock sharedlock; 99}; 100 101static union args * 102getargs(struct shared *shared) 103{ 104 union args *args; 105 106 OSSpinLockLock(&shared->sharedlock); 107 if(!shared->freelist) { 108 struct page *page; 109 union args *prev; 110 int i; 111 if((page = (struct page *)mmap(NULL, PAGESIZE, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0)) == NULL) 112 return NULL; 113 page->next = shared->pagelist; 114 shared->pagelist = page; 115 prev = NULL; 116 for(args = page->args, i = NARGS; i > 0; args++, i--) { 117 args->next = prev; 118 prev = args; 119 } 120 shared->freelist = prev; 121 } 122 args = shared->freelist; 123 shared->freelist = args->next; 124 OSSpinLockUnlock(&shared->sharedlock); 125 return args; 126} 127 128static void 129returnargs(struct shared *shared, union args *args) 130{ 131 OSSpinLockLock(&shared->sharedlock); 132 args->next = shared->freelist; 133 shared->freelist = args; 134 OSSpinLockUnlock(&shared->sharedlock); 135} 136 137/* 138 * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function". 139 */ 140#define swapcode(TYPE, parmi, parmj, n) { \ 141 long i = (n) / sizeof (TYPE); \ 142 TYPE *pi = (TYPE *) (parmi); \ 143 TYPE *pj = (TYPE *) (parmj); \ 144 do { \ 145 TYPE t = *pi; \ 146 *pi++ = *pj; \ 147 *pj++ = t; \ 148 } while (--i > 0); \ 149} 150 151#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \ 152 es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1; 153 154static inline void 155swapfunc(a, b, n, swaptype) 156 char *a, *b; 157 int n, swaptype; 158{ 159 if(swaptype <= 1) 160 swapcode(long, a, b, n) 161 else 162 swapcode(char, a, b, n) 163} 164 165#define swap(a, b) \ 166 if (swaptype == 0) { \ 167 long t = *(long *)(a); \ 168 *(long *)(a) = *(long *)(b); \ 169 *(long *)(b) = t; \ 170 } else \ 171 swapfunc(a, b, es, swaptype) 172 173#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype) 174 175#ifdef I_AM_PSORT_R 176#define CMP(t, x, y) (cmp((t), (x), (y))) 177#else 178#define CMP(t, x, y) (cmp((x), (y))) 179#endif 180 181static inline char * 182med3(char *a, char *b, char *c, 183#ifdef I_AM_PSORT_B 184cmp_t ^cmp, 185#else 186cmp_t *cmp, 187#endif 188void *thunk 189#ifndef I_AM_PSORT_R 190__unused 191#endif 192) 193{ 194 return CMP(thunk, a, b) < 0 ? 195 (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a )) 196 :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c )); 197} 198 199#ifdef __LP64__ 200#define DEPTH(x) (2 * (flsl((long)(x)) - 1)) 201#else /* !__LP64__ */ 202#define DEPTH(x) (2 * (fls((int)(x)) - 1)) 203#endif /* __LP64__ */ 204 205#ifdef I_AM_PSORT_R 206int __heapsort_r(void *, size_t, size_t, void *, int (*)(void *, const void *, const void *)); 207#endif 208 209static void _psort_parallel(void *x); 210 211static void 212_psort(void *a, size_t n, size_t es, 213#ifdef I_AM_PSORT_R 214void *thunk, 215#else 216#define thunk NULL 217#endif 218#ifdef I_AM_PSORT_B 219cmp_t ^cmp, 220#else 221cmp_t *cmp, 222#endif 223int depth_limit, struct shared *shared) 224{ 225 char *pa, *pb, *pc, *pd, *pl, *pm, *pn; 226 size_t d, r; 227 int cmp_result; 228 int swaptype, swap_cnt; 229 230loop: 231 if (depth_limit-- <= 0) { 232#ifdef I_AM_PSORT_B 233 heapsort_b(a, n, es, cmp); 234#elif defined(I_AM_PSORT_R) 235 __heapsort_r(a, n, es, thunk, cmp); 236#else 237 heapsort(a, n, es, cmp); 238#endif 239 return; 240 } 241 SWAPINIT(a, es); 242 swap_cnt = 0; 243 if (n < 7) { 244 for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) 245 for (pl = pm; 246 pl > (char *)a && CMP(thunk, pl - es, pl) > 0; 247 pl -= es) 248 swap(pl, pl - es); 249 return; 250 } 251 pm = (char *)a + (n / 2) * es; 252 if (n > 7) { 253 pl = a; 254 pn = (char *)a + (n - 1) * es; 255 if (n > 40) { 256 d = (n / 8) * es; 257 pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk); 258 pm = med3(pm - d, pm, pm + d, cmp, thunk); 259 pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk); 260 } 261 pm = med3(pl, pm, pn, cmp, thunk); 262 } 263 swap(a, pm); 264 pa = pb = (char *)a + es; 265 266 pc = pd = (char *)a + (n - 1) * es; 267 for (;;) { 268 while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) { 269 if (cmp_result == 0) { 270 swap_cnt = 1; 271 swap(pa, pb); 272 pa += es; 273 } 274 pb += es; 275 } 276 while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) { 277 if (cmp_result == 0) { 278 swap_cnt = 1; 279 swap(pc, pd); 280 pd -= es; 281 } 282 pc -= es; 283 } 284 if (pb > pc) 285 break; 286 swap(pb, pc); 287 swap_cnt = 1; 288 pb += es; 289 pc -= es; 290 } 291 292 pn = (char *)a + n * es; 293 r = min(pa - (char *)a, pb - pa); 294 vecswap(a, pb - r, r); 295 r = min(pd - pc, pn - pd - es); 296 vecswap(pb, pn - r, r); 297 298 if (swap_cnt == 0) { /* Switch to insertion sort */ 299 r = 1 + n / 4; /* n >= 7, so r >= 2 */ 300 for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) 301 for (pl = pm; 302 pl > (char *)a && CMP(thunk, pl - es, pl) > 0; 303 pl -= es) { 304 swap(pl, pl - es); 305 if (++swap_cnt > r) goto nevermind; 306 } 307 return; 308 } 309 310nevermind: 311 if ((r = pb - pa) > es) { 312 r /= es; 313 if (shared && r > shared->turnoff) { 314 union args *args = getargs(shared); 315 316 if (args == NULL) 317 LIBC_ABORT("%s: getargs: %s", shared->who, strerror(errno)); 318 args->shared = shared; 319 args->a = a; 320 args->n = r; 321 args->depth_limit = depth_limit; 322 dispatch_group_async_f(shared->group, shared->queue, args, 323 _psort_parallel); 324 } else { 325#ifdef I_AM_PSORT_R 326 _psort(a, r, es, thunk, cmp, depth_limit, NULL); 327#else 328 _psort(a, r, es, cmp, depth_limit, NULL); 329#endif 330 } 331 } 332 if ((r = pd - pc) > es) { 333 /* Iterate rather than recurse to save stack space */ 334 a = pn - r; 335 n = r / es; 336 goto loop; 337 } 338/* psort(pn - r, r / es, es, cmp);*/ 339} 340 341static void 342_psort_parallel(void *x) 343{ 344 union args *args = (union args *)x; 345 struct shared *shared = args->shared; 346 347 _psort(args->a, args->n, shared->es, 348#ifdef I_AM_PSORT_R 349 shared->thunk, 350#endif 351 shared->cmp, args->depth_limit, shared); 352 returnargs(shared, args); 353} 354 355/* fast, approximate integer square root */ 356static size_t 357isqrt(size_t x) 358{ 359 size_t s = 1L << (flsl(x) / 2); 360 return (s + x / s) / 2; 361} 362 363void 364#ifdef I_AM_PSORT_R 365psort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp) 366#elif defined(I_AM_PSORT_B) 367psort_b(void *a, size_t n, size_t es, cmp_t ^cmp) 368#else 369psort(void *a, size_t n, size_t es, cmp_t *cmp) 370#endif 371{ 372 if (n >= PARALLEL_MIN_SIZE && _NumCPUs() > 1) { 373 struct shared shared; 374 union args *args; 375 376 bzero(&shared, sizeof(shared)); 377 shared.sharedlock = OS_SPINLOCK_INIT; 378 if ((args = getargs(&shared)) != NULL) { 379 struct page *p, *pp; 380#ifdef I_AM_PSORT_R 381 shared.who = "psort_r"; 382 shared.thunk = thunk; 383#elif defined(I_AM_PSORT_B) 384 shared.who = "psort_b"; 385#else 386 shared.who = "psort"; 387#endif 388 shared.cmp = cmp; 389 shared.es = es; 390 shared.queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0); 391 shared.group = dispatch_group_create(); 392 args->a = a; 393 args->n = n; 394 args->depth_limit = DEPTH(n); 395 args->shared = &shared; 396 /* 397 * The turnoff value is the size of a partition that, 398 * below which, we stop doing in parallel, and just do 399 * in the current thread. The value of sqrt(n) was 400 * determined heuristically. There is a smaller 401 * dependence on the slowness of the comparison 402 * function, and there might be a dependence on the 403 * number of processors, but the algorithm has not been 404 * determined. Because the sensitivity to the turnoff 405 * value is relatively low, we use a fast, approximate 406 * integer square root routine that is good enough for 407 * this purpose. 408 */ 409 shared.turnoff = isqrt(n); 410 _psort_parallel(args); 411 412 /* wait for queue to drain */ 413 dispatch_group_wait(shared.group, DISPATCH_TIME_FOREVER); 414 dispatch_release(shared.group); 415 for(p = shared.pagelist; p; p = pp) { 416 pp = p->next; 417 munmap(p, PAGESIZE); 418 } 419 return; 420 } 421 } 422 /* Just call qsort */ 423#ifdef I_AM_PSORT_R 424 qsort_r(a, n, es, thunk, cmp); 425#elif defined(I_AM_PSORT_B) 426 qsort_b(a, n, es, cmp); 427#else 428 qsort(a, n, es, cmp); 429#endif 430} 431