radixsort.c revision 1573
1/*-
2 * Copyright (c) 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Peter McIlroy and by Dan Bernstein at New York University,
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#if defined(LIBC_SCCS) && !defined(lint)
38static char sccsid[] = "@(#)radixsort.c	8.1 (Berkeley) 6/4/93";
39#endif /* LIBC_SCCS and not lint */
40
41/*
42 * Radixsort routines.
43 *
44 * Program r_sort_a() is unstable but uses O(logN) extra memory for a stack.
45 * Use radixsort(a, n, trace, endchar) for this case.
46 *
47 * For stable sorting (using N extra pointers) use sradixsort(), which calls
48 * r_sort_b().
49 *
50 * For a description of this code, see D. McIlroy, P. McIlroy, K. Bostic,
51 * "Engineering Radix Sort".
52 */
53
54#include <sys/types.h>
55#include <stdlib.h>
56#include <stddef.h>
57#include <errno.h>
58
59typedef struct {
60	const u_char **sa;
61	int sn, si;
62} stack;
63
64static inline void simplesort
65	    __P((const u_char **, int, int, const u_char *, u_int));
66static void r_sort_a __P((const u_char **, int, int, const u_char *, u_int));
67static void r_sort_b __P((const u_char **,
68	    const u_char **, int, int, const u_char *, u_int));
69
70#define	THRESHOLD	20		/* Divert to simplesort(). */
71#define	SIZE		512		/* Default stack size. */
72
73#define SETUP {								\
74	if (tab == NULL) {						\
75		tr = tr0;						\
76		for (c = 0; c < endch; c++)				\
77			tr0[c] = c + 1;					\
78		tr0[c] = 0;						\
79		for (c++; c < 256; c++)					\
80			tr0[c] = c;					\
81		endch = 0;						\
82	} else {							\
83		endch = tab[endch];					\
84		tr = tab;						\
85		if (endch != 0 && endch != 255) {			\
86			errno = EINVAL;					\
87			return (-1);					\
88		}							\
89	}								\
90}
91
92int
93radixsort(a, n, tab, endch)
94	const u_char **a, *tab;
95	int n;
96	u_int endch;
97{
98	const u_char *tr;
99	int c;
100	u_char tr0[256];
101
102	SETUP;
103	r_sort_a(a, n, 0, tr, endch);
104	return (0);
105}
106
107int
108sradixsort(a, n, tab, endch)
109	const u_char **a, *tab;
110	int n;
111	u_int endch;
112{
113	const u_char *tr, **ta;
114	int c;
115	u_char tr0[256];
116
117	SETUP;
118	if (n < THRESHOLD)
119		simplesort(a, n, 0, tr, endch);
120	else {
121		if ((ta = malloc(n * sizeof(a))) == NULL)
122			return (-1);
123		r_sort_b(a, ta, n, 0, tr, endch);
124		free(ta);
125	}
126	return (0);
127}
128
129#define empty(s)	(s >= sp)
130#define pop(a, n, i)	a = (--sp)->sa, n = sp->sn, i = sp->si
131#define push(a, n, i)	sp->sa = a, sp->sn = n, (sp++)->si = i
132#define swap(a, b, t)	t = a, a = b, b = t
133
134/* Unstable, in-place sort. */
135void
136r_sort_a(a, n, i, tr, endch)
137	const u_char **a;
138	int n, i;
139	const u_char *tr;
140	u_int endch;
141{
142	static int count[256], nc, bmin;
143	register int c;
144	register const u_char **ak, *r;
145	stack s[SIZE], *sp, *sp0, *sp1, temp;
146	int *cp, bigc;
147	const u_char **an, *t, **aj, **top[256];
148
149	/* Set up stack. */
150	sp = s;
151	push(a, n, i);
152	while (!empty(s)) {
153		pop(a, n, i);
154		if (n < THRESHOLD) {
155			simplesort(a, n, i, tr, endch);
156			continue;
157		}
158		an = a + n;
159
160		/* Make character histogram. */
161		if (nc == 0) {
162			bmin = 255;	/* First occupied bin, excluding eos. */
163			for (ak = a; ak < an;) {
164				c = tr[(*ak++)[i]];
165				if (++count[c] == 1 && c != endch) {
166					if (c < bmin)
167						bmin = c;
168					nc++;
169				}
170			}
171			if (sp + nc > s + SIZE) {	/* Get more stack. */
172				r_sort_a(a, n, i, tr, endch);
173				continue;
174			}
175		}
176
177		/*
178		 * Set top[]; push incompletely sorted bins onto stack.
179		 * top[] = pointers to last out-of-place element in bins.
180		 * count[] = counts of elements in bins.
181		 * Before permuting: top[c-1] + count[c] = top[c];
182		 * during deal: top[c] counts down to top[c-1].
183		 */
184		sp0 = sp1 = sp;		/* Stack position of biggest bin. */
185		bigc = 2;		/* Size of biggest bin. */
186		if (endch == 0)		/* Special case: set top[eos]. */
187			top[0] = ak = a + count[0];
188		else {
189			ak = a;
190			top[255] = an;
191		}
192		for (cp = count + bmin; nc > 0; cp++) {
193			while (*cp == 0)	/* Find next non-empty pile. */
194				cp++;
195			if (*cp > 1) {
196				if (*cp > bigc) {
197					bigc = *cp;
198					sp1 = sp;
199				}
200				push(ak, *cp, i+1);
201			}
202			top[cp-count] = ak += *cp;
203			nc--;
204		}
205		swap(*sp0, *sp1, temp);	/* Play it safe -- biggest bin last. */
206
207		/*
208		 * Permute misplacements home.  Already home: everything
209		 * before aj, and in bin[c], items from top[c] on.
210		 * Inner loop:
211		 *	r = next element to put in place;
212		 *	ak = top[r[i]] = location to put the next element.
213		 *	aj = bottom of 1st disordered bin.
214		 * Outer loop:
215		 *	Once the 1st disordered bin is done, ie. aj >= ak,
216		 *	aj<-aj + count[c] connects the bins in a linked list;
217		 *	reset count[c].
218		 */
219		for (aj = a; aj < an;  *aj = r, aj += count[c], count[c] = 0)
220			for (r = *aj;  aj < (ak = --top[c = tr[r[i]]]);)
221				swap(*ak, r, t);
222	}
223}
224
225/* Stable sort, requiring additional memory. */
226void
227r_sort_b(a, ta, n, i, tr, endch)
228	const u_char **a, **ta;
229	int n, i;
230	const u_char *tr;
231	u_int endch;
232{
233	static int count[256], nc, bmin;
234	register int c;
235	register const u_char **ak, **ai;
236	stack s[512], *sp, *sp0, *sp1, temp;
237	const u_char **top[256];
238	int *cp, bigc;
239
240	sp = s;
241	push(a, n, i);
242	while (!empty(s)) {
243		pop(a, n, i);
244		if (n < THRESHOLD) {
245			simplesort(a, n, i, tr, endch);
246			continue;
247		}
248
249		if (nc == 0) {
250			bmin = 255;
251			for (ak = a + n; --ak >= a;) {
252				c = tr[(*ak)[i]];
253				if (++count[c] == 1 && c != endch) {
254					if (c < bmin)
255						bmin = c;
256					nc++;
257				}
258			}
259			if (sp + nc > s + SIZE) {
260				r_sort_b(a, ta, n, i, tr, endch);
261				continue;
262			}
263		}
264
265		sp0 = sp1 = sp;
266		bigc = 2;
267		if (endch == 0) {
268			top[0] = ak = a + count[0];
269			count[0] = 0;
270		} else {
271			ak = a;
272			top[255] = a + n;
273			count[255] = 0;
274		}
275		for (cp = count + bmin; nc > 0; cp++) {
276			while (*cp == 0)
277				cp++;
278			if ((c = *cp) > 1) {
279				if (c > bigc) {
280					bigc = c;
281					sp1 = sp;
282				}
283				push(ak, c, i+1);
284			}
285			top[cp-count] = ak += c;
286			*cp = 0;			/* Reset count[]. */
287			nc--;
288		}
289		swap(*sp0, *sp1, temp);
290
291		for (ak = ta + n, ai = a+n; ak > ta;)	/* Copy to temp. */
292			*--ak = *--ai;
293		for (ak = ta+n; --ak >= ta;)		/* Deal to piles. */
294			*--top[tr[(*ak)[i]]] = *ak;
295	}
296}
297
298static inline void
299simplesort(a, n, b, tr, endch)	/* insertion sort */
300	register const u_char **a;
301	int n, b;
302	register const u_char *tr;
303	u_int endch;
304{
305	register u_char ch;
306	const u_char  **ak, **ai, *s, *t;
307
308	for (ak = a+1; --n >= 1; ak++)
309		for (ai = ak; ai > a; ai--) {
310			for (s = ai[0] + b, t = ai[-1] + b;
311			    (ch = tr[*s]) != endch; s++, t++)
312				if (ch != tr[*t])
313					break;
314			if (ch >= tr[*t])
315				break;
316			swap(ai[0], ai[-1], s);
317		}
318}
319