1/* $NetBSD: radix_sort.c,v 1.3 2009/09/10 22:02:40 dsl Exp $ */ 2 3/*- 4 * Copyright (c) 1990, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Peter McIlroy and by Dan Bernstein at New York University, 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35#include <sys/cdefs.h> 36#if defined(LIBC_SCCS) && !defined(lint) 37#if 0 38static char sccsid[] = "@(#)radixsort.c 8.2 (Berkeley) 4/28/95"; 39#else 40__RCSID("$NetBSD: radix_sort.c,v 1.3 2009/09/10 22:02:40 dsl Exp $"); 41#endif 42#endif /* LIBC_SCCS and not lint */ 43 44/* 45 * 'stable' radix sort initially from libc/stdlib/radixsort.c 46 */ 47 48#include <sys/types.h> 49 50#include <assert.h> 51#include <errno.h> 52#include <util.h> 53#include "sort.h" 54 55typedef struct { 56 RECHEADER **sa; /* Base of saved area */ 57 int sn; /* Number of entries */ 58 int si; /* index into data for compare */ 59} stack; 60 61static void simplesort(RECHEADER **, int, int); 62 63#define THRESHOLD 20 /* Divert to simplesort(). */ 64 65#define empty(s) (s >= sp) 66#define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si 67#define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i 68#define swap(a, b, t) t = a, a = b, b = t 69 70void 71radix_sort(RECHEADER **a, RECHEADER **ta, int n) 72{ 73 u_int count[256], nc, bmin; 74 u_int c; 75 RECHEADER **ak, **tai, **lim; 76 RECHEADER *hdr; 77 int stack_size = 512; 78 stack *s, *sp, *sp0, *sp1, temp; 79 RECHEADER **top[256]; 80 u_int *cp, bigc; 81 int data_index = 0; 82 83 if (n < THRESHOLD && !DEBUG('r')) { 84 simplesort(a, n, 0); 85 return; 86 } 87 88 s = emalloc(stack_size * sizeof *s); 89 memset(&count, 0, sizeof count); 90 /* Technically 'top' doesn't need zeroing */ 91 memset(&top, 0, sizeof top); 92 93 sp = s; 94 push(a, n, data_index); 95 while (!empty(s)) { 96 pop(a, n, data_index); 97 if (n < THRESHOLD && !DEBUG('r')) { 98 simplesort(a, n, data_index); 99 continue; 100 } 101 102 /* Count number of times each 'next byte' occurs */ 103 nc = 0; 104 bmin = 255; 105 lim = a + n; 106 for (ak = a, tai = ta; ak < lim; ak++) { 107 hdr = *ak; 108 if (data_index >= hdr->keylen) { 109 /* Short key, copy to start of output */ 110 if (UNIQUE && a != sp->sa) 111 /* Stop duplicate being written out */ 112 hdr->keylen = -1; 113 *a++ = hdr; 114 n--; 115 continue; 116 } 117 /* Save in temp buffer for distribute */ 118 *tai++ = hdr; 119 c = hdr->data[data_index]; 120 if (++count[c] == 1) { 121 if (c < bmin) 122 bmin = c; 123 nc++; 124 } 125 } 126 /* 127 * We need save the bounds for each 'next byte' that 128 * occurs more so we can sort each block. 129 */ 130 if (sp + nc > s + stack_size) { 131 stack_size *= 2; 132 sp1 = erealloc(s, stack_size * sizeof *s); 133 sp = sp1 + (sp - s); 134 s = sp1; 135 } 136 137 /* Minor optimisation to do the largest set last */ 138 sp0 = sp1 = sp; 139 bigc = 2; 140 /* Convert 'counts' positions, saving bounds for later sorts */ 141 ak = a; 142 for (cp = count + bmin; nc > 0; cp++) { 143 while (*cp == 0) 144 cp++; 145 if ((c = *cp) > 1) { 146 if (c > bigc) { 147 bigc = c; 148 sp1 = sp; 149 } 150 push(ak, c, data_index+1); 151 } 152 ak += c; 153 top[cp-count] = ak; 154 *cp = 0; /* Reset count[]. */ 155 nc--; 156 } 157 swap(*sp0, *sp1, temp); 158 159 for (ak = ta+n; --ak >= ta;) /* Deal to piles. */ 160 *--top[(*ak)->data[data_index]] = *ak; 161 } 162 163 free(s); 164} 165 166/* insertion sort, short records are sorted before long ones */ 167static void 168simplesort(RECHEADER **a, int n, int data_index) 169{ 170 RECHEADER **ak, **ai; 171 RECHEADER *akh; 172 RECHEADER **lim = a + n; 173 const u_char *s, *t; 174 int s_len, t_len; 175 int i; 176 int r; 177 178 if (n <= 1) 179 return; 180 181 for (ak = a+1; ak < lim; ak++) { 182 akh = *ak; 183 s = akh->data; 184 s_len = akh->keylen; 185 for (ai = ak; ;) { 186 ai--; 187 t_len = (*ai)->keylen; 188 if (t_len != -1) { 189 t = (*ai)->data; 190 for (i = data_index; ; i++) { 191 if (i >= s_len || i >= t_len) { 192 r = s_len - t_len; 193 break; 194 } 195 r = s[i] - t[i]; 196 if (r != 0) 197 break; 198 } 199 if (r >= 0) { 200 if (r == 0 && UNIQUE) { 201 /* Put record below existing */ 202 ai[1] = ai[0]; 203 /* Mark as duplicate - ignore */ 204 akh->keylen = -1; 205 } else { 206 ai++; 207 } 208 break; 209 } 210 } 211 ai[1] = ai[0]; 212 if (ai == a) 213 break; 214 } 215 ai[0] = akh; 216 } 217} 218