qsort.c revision 1219:f89f56c2d9ac
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23/*
24 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
25 * Use is subject to license terms.
26 */
27
28#pragma ident	"%Z%%M%	%I%	%E% SMI"
29
30
31#if !defined(_KERNEL) && !defined(_KMDB)
32#include "synonyms.h"
33#endif /* !_KERNEL && !_KMDB */
34
35#include <sys/types.h>
36
37#if !defined(_KERNEL) && !defined(_KMDB)
38#include <stdlib.h>
39#include <synch.h>
40#endif /* !_KERNEL && !_KMDB */
41
42#include "qsort.h"
43
44static void swapp32(uint32_t *r1, uint32_t *r2, size_t cnt);
45static void swapp64(uint64_t *r1, uint64_t *r2, size_t cnt);
46static void swapi(uint32_t *r1, uint32_t *r2, size_t cnt);
47static void swapb(char *r1, char *r2, size_t cnt);
48
49/*
50 * choose a median of 3 values
51 *
52 * note: cstyle specifically prohibits nested conditional operators
53 * but this is the only way to do the median of 3 function in-line
54 */
55#define	med3(a, b, c) (cmp((a), (b)) < 0) \
56	? ((cmp((b), (c)) < 0) ? (b) : (cmp((a), (c)) < 0) ? (c) : (a)) \
57	: ((cmp((b), (c)) > 0) ? (b) : (cmp((a), (c)) > 0) ? (c) : (a))
58
59#define	THRESH_L	5	/* threshold for insertion sort */
60#define	THRESH_M3	20	/* threshold for median of 3 */
61#define	THRESH_M9	50	/* threshold for median of 9 */
62
63typedef struct {
64	char	*b_lim;
65	size_t	nrec;
66} stk_t;
67
68/*
69 * qsort() is a general purpose, in-place sorting routine using a
70 * user provided call back function for comparisons.  This implementation
71 * utilizes a ternary quicksort algorithm, and cuts over to an
72 * insertion sort for partitions involving fewer than THRESH_L records.
73 *
74 * Potential User Errors
75 *   There is no return value from qsort, this function has no method
76 *   of alerting the user that a sort did not work or could not work.
77 *   We do not print an error message or exit the process or thread,
78 *   Even if we can detect an error, We CANNOT silently return without
79 *   sorting the data, if we did so the user could never be sure the
80 *   sort completed successfully.
81 *   It is possible we could change the return value of sort from void
82 *   to int and return success or some error codes, but this gets into
83 *   standards  and compatibility issues.
84 *
85 *   Examples of qsort parameter errors might be
86 *   1) record size (rsiz) equal to 0
87 *      qsort will loop and never return.
88 *   2) record size (rsiz) less than 0
89 *      rsiz is unsigned, so a negative value is insanely large
90 *   3) number of records (nrec) is 0
91 *      This is legal - qsort will return without examining any records
92 *   4) number of records (nrec) is less than 0
93 *      nrec is unsigned, so a negative value is insanely large.
94 *   5) nrec * rsiz > memory allocation for sort array
95 *      a segment violation may occur
96 *      corruption of other memory may occur
97 *   6) The base address of the sort array is invalid
98 *      a segment violation may occur
99 *      corruption of other memory may occur
100 *   7) The user call back function is invalid
101 *      we may get alignment errors or segment violations
102 *      we may jump into never-never land
103 *
104 *   Some less obvious errors might be
105 *   8) The user compare function is not comparing correctly
106 *   9) The user compare function modifies the data records
107 */
108
109void
110qsort(
111	void		*basep,
112	size_t		nrec,
113	size_t		rsiz,
114	int		(*cmp)(const void *, const void *))
115{
116	size_t		i;		/* temporary variable */
117
118	/* variables used by swap */
119	void		(*swapf)(char *, char *, size_t);
120	size_t		loops;
121
122	/* variables used by sort */
123	stk_t		stack[8 * sizeof (nrec) + 1];
124	stk_t		*sp;
125	char		*b_lim;		/* bottom limit */
126	char		*b_dup;		/* bottom duplicate */
127	char		*b_par;		/* bottom partition */
128	char		*t_lim;		/* top limit */
129	char		*t_dup;		/* top duplicate */
130	char		*t_par;		/* top partition */
131	char		*m1, *m2, *m3;	/* median pointers */
132	uintptr_t	d_bytelength;	/* byte length of duplicate records */
133	int		b_nrec;
134	int		t_nrec;
135	int		cv;		/* results of compare (bottom / top) */
136
137	/*
138	 * choose a swap function based on alignment and size
139	 *
140	 * The qsort function sorts an array of fixed length records.
141	 * We have very limited knowledge about the data record itself.
142	 * It may be that the data record is in the array we are sorting
143	 * or it may be that the array contains pointers or indexes to
144	 * the actual data record and all that we are sorting is the indexes.
145	 *
146	 * The following decision will choose an optimal swap function
147	 * based on the size and alignment of the data records
148	 *   swapp64	will swap 64 bit pointers
149	 *   swapp32	will swap 32 bit pointers
150	 *   swapi	will swap an array of 32 bit integers
151	 *   swapb	will swap an array of 8 bit characters
152	 *
153	 * swapi and swapb will also require the variable loops to be set
154	 * to control the length of the array being swapped
155	 */
156	if ((((uintptr_t)basep & (sizeof (uint64_t) - 1)) == 0) &&
157	    (rsiz == sizeof (uint64_t))) {
158		loops = 1;
159		swapf = (void (*)(char *, char *, size_t))swapp64;
160	} else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
161	    (rsiz == sizeof (uint32_t))) {
162		loops = 1;
163		swapf = (void (*)(char *, char *, size_t))swapp32;
164	} else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
165	    ((rsiz & (sizeof (uint32_t) - 1)) == 0)) {
166		loops = rsiz / sizeof (int);
167		swapf = (void (*)(char *, char *, size_t))swapi;
168	} else {
169		loops = rsiz;
170		swapf = swapb;
171	}
172
173	/*
174	 * qsort is a partitioning sort
175	 *
176	 * the stack is the bookkeeping mechanism to keep track of all
177	 * the partitions.
178	 *
179	 * each sort pass takes one partition and sorts it into two partitions.
180	 * at the top of the loop we simply take the partition on the top
181	 * of the stack and sort it. See the comments at the bottom
182	 * of the loop regarding which partitions to add in what order.
183	 *
184	 * initially put the whole partition on the stack
185	 */
186	sp = stack;
187	sp->b_lim = (char *)basep;
188	sp->nrec = nrec;
189	sp++;
190	while (sp > stack) {
191		sp--;
192		b_lim = sp->b_lim;
193		nrec = sp->nrec;
194
195		/*
196		 * a linear insertion sort i faster than a qsort for
197		 * very small number of records (THRESH_L)
198		 *
199		 * if number records < threshold use linear insertion sort
200		 *
201		 * this also handles the special case where the partition
202		 * 0 or 1 records length.
203		 */
204		if (nrec < THRESH_L) {
205			/*
206			 * Linear insertion sort
207			 */
208			t_par = b_lim;
209			for (i = 1; i < nrec; i++) {
210				t_par += rsiz;
211				b_par = t_par;
212				while (b_par > b_lim) {
213					b_par -= rsiz;
214					if ((*cmp)(b_par, b_par + rsiz) <= 0) {
215						break;
216					}
217					(*swapf)(b_par, b_par + rsiz, loops);
218				}
219			}
220
221			/*
222			 * a linear insertion sort will put all records
223			 * in their final position and will not create
224			 * subpartitions.
225			 *
226			 * therefore when the insertion sort is complete
227			 * just go to the top of the loop and get the
228			 * next partition to sort.
229			 */
230			continue;
231		}
232
233		/* quicksort */
234
235		/*
236		 * choose a pivot record
237		 *
238		 * Ideally the pivot record will divide the partition
239		 * into two equal parts. however we have to balance the
240		 * work involved in selecting the pivot record with the
241		 * expected benefit.
242		 *
243		 * The choice of pivot record depends on the number of
244		 * records in the partition
245		 *
246		 * for small partitions (nrec < THRESH_M3)
247		 *   we just select the record in the middle of the partition
248		 *
249		 * if (nrec >= THRESH_M3 && nrec < THRESH_M9)
250		 *   we select three values and choose the median of 3
251		 *
252		 * if (nrec >= THRESH_M9)
253		 *   then we use an approximate median of 9
254		 *   9 records are selected and grouped in 3 groups of 3
255		 *   the median of each of these 3 groups is fed into another
256		 *   median of 3 decision.
257		 *
258		 * Each median of 3 decision is 2 or 3 compares,
259		 * so median of 9 costs between 8 and 12 compares.
260		 *
261		 * i is byte distance between two consecutive samples
262		 * m2 will point to the pivot record
263		 */
264		if (nrec < THRESH_M3) {
265			m2 = b_lim + (nrec / 2) * rsiz;
266		} else if (nrec < THRESH_M9) {
267			/* use median of 3 */
268			i = ((nrec - 1) / 2) * rsiz;
269			m2 = med3(b_lim, b_lim + i, b_lim + 2 * i);
270		} else {
271			/* approx median of 9 */
272			i = ((nrec - 1) / 8) * rsiz;
273			m1 = med3(b_lim, b_lim +  i, b_lim + 2 * i);
274			m2 = med3(b_lim + 3 * i, b_lim + 4 * i, b_lim + 5 * i);
275			m3 = med3(b_lim + 6 * i, b_lim + 7 * i, b_lim + 8 * i);
276			m2 = med3(m1, m2, m3);
277		}
278
279		/*
280		 * quick sort partitioning
281		 *
282		 * The partition limits are defined by bottom and top pointers
283		 * b_lim and t_lim.
284		 *
285		 * qsort uses a fairly standard method of moving the
286		 * partitioning pointers, b_par and t_par, to the middle of
287		 * the partition and exchanging records that are in the
288		 * wrong part of the partition.
289		 *
290		 * Two enhancements have been made to the basic algorithm.
291		 * One for handling duplicate records and one to minimize
292		 * the number of swaps.
293		 *
294		 * Two duplicate records pointers are (b_dup and t_dup) are
295		 * initially set to b_lim and t_lim.  Each time a record
296		 * whose sort key value is equal to the pivot record is found
297		 * it will be swapped with the record pointed to by
298		 * b_dup or t_dup and the duplicate pointer will be
299		 * incremented toward the center.
300		 * When partitioning is complete, all the duplicate records
301		 * will have been collected at the upper and lower limits of
302		 * the partition and can easily be moved adjacent to the
303		 * pivot record.
304		 *
305		 * The second optimization is to minimize the number of swaps.
306		 * The pointer m2 points to the pivot record.
307		 * During partitioning, if m2 is ever equal to the partitioning
308		 * pointers, b_par or t_par, then b_par or t_par just moves
309		 * onto the next record without doing a compare.
310		 * If as a result of duplicate record detection,
311		 * b_dup or t_dup is ever equal to m2,
312		 * then m2 is changed to point to the duplicate record and
313		 * b_dup or t_dup is incremented with out swapping records.
314		 *
315		 * When partitioning is done, we may not have the same pivot
316		 * record that we started with, but we will have one with
317		 * an equal sort key.
318		 */
319		b_dup = b_par		= b_lim;
320		t_dup = t_par = t_lim	= b_lim + rsiz * (nrec - 1);
321		for (;;) {
322
323			/* move bottom pointer up */
324			for (; b_par <= t_par; b_par += rsiz) {
325				if (b_par == m2) {
326					continue;
327				}
328				cv = cmp(b_par, m2);
329				if (cv > 0) {
330					break;
331				}
332				if (cv == 0) {
333					if (b_dup == m2) {
334						m2 = b_par;
335					} else if (b_dup != b_par) {
336						(*swapf)(b_dup, b_par, loops);
337					}
338					b_dup += rsiz;
339				}
340			}
341
342			/* move top pointer down */
343			for (; b_par < t_par; t_par -= rsiz) {
344				if (t_par == m2) {
345					continue;
346				}
347				cv = cmp(t_par, m2);
348				if (cv < 0) {
349					break;
350				}
351				if (cv == 0) {
352					if (t_dup == m2) {
353						m2 = t_par;
354					} else if (t_dup != t_par) {
355						(*swapf)(t_dup, t_par, loops);
356					}
357					t_dup -= rsiz;
358				}
359			}
360
361			/* break if we are done partitioning */
362			if (b_par >= t_par) {
363				break;
364			}
365
366			/* exchange records at upper and lower break points */
367			(*swapf)(b_par, t_par, loops);
368			b_par += rsiz;
369			t_par -= rsiz;
370		}
371
372		/*
373		 * partitioning is now complete
374		 *
375		 * there are two termination conditions from the partitioning
376		 * loop above.  Either b_par or t_par have crossed or
377		 * they are equal.
378		 *
379		 * we need to swap the pivot record to its final position
380		 * m2 could be in either the upper or lower partitions
381		 * or it could already be in its final position
382		 */
383		/*
384		 * R[b_par] > R[m2]
385		 * R[t_par] < R[m2]
386		 */
387		if (t_par < b_par) {
388			if (m2 < t_par) {
389				(*swapf)(m2, t_par, loops);
390				m2 = b_par = t_par;
391			} else if (m2 > b_par) {
392				(*swapf)(m2, b_par, loops);
393				m2 = t_par = b_par;
394			} else {
395				b_par = t_par = m2;
396			}
397		} else {
398			if (m2 < t_par) {
399				t_par = b_par = t_par - rsiz;
400			}
401			if (m2 != b_par) {
402				(*swapf)(m2, b_par, loops);
403			}
404			m2 = t_par;
405		}
406
407		/*
408		 * move bottom duplicates next to pivot
409		 * optimized to eliminate overlap
410		 */
411		d_bytelength = b_dup - b_lim;
412		if (b_par - b_dup < d_bytelength) {
413			b_dup = b_lim + (b_par - b_dup);
414		}
415		while (b_dup > b_lim) {
416			b_dup -= rsiz;
417			b_par -= rsiz;
418			(*swapf)(b_dup, b_par, loops);
419		}
420		b_par = m2 - d_bytelength;
421
422		/*
423		 * move top duplicates next to pivot
424		 */
425		d_bytelength = t_lim - t_dup;
426		if (t_dup - t_par < d_bytelength) {
427			t_dup = t_lim - (t_dup - t_par);
428		}
429		while (t_dup < t_lim) {
430			t_dup += rsiz;
431			t_par += rsiz;
432			(*swapf)(t_dup, t_par, loops);
433		}
434		t_par = m2 + d_bytelength;
435
436		/*
437		 * when a qsort pass completes there are three partitions
438		 * 1) the lower contains all records less than pivot
439		 * 2) the upper contains all records greater than pivot
440		 * 3) the pivot partition contains all record equal to pivot
441		 *
442		 * all records in the pivot partition are in their final
443		 * position and do not need to be accounted for by the stack
444		 *
445		 * when adding partitions to the stack
446		 * it is important to add the largest partition first
447		 * to prevent stack overflow.
448		 *
449		 * calculate number of unsorted records in top and bottom
450		 * push resulting partitions on stack
451		 */
452		b_nrec = (b_par - b_lim) / rsiz;
453		t_nrec = (t_lim - t_par) / rsiz;
454		if (b_nrec < t_nrec) {
455			sp->b_lim = t_par + rsiz;
456			sp->nrec = t_nrec;
457			sp++;
458			sp->b_lim = b_lim;
459			sp->nrec = b_nrec;
460			sp++;
461		} else {
462			sp->b_lim = b_lim;
463			sp->nrec = b_nrec;
464			sp++;
465			sp->b_lim = t_par + rsiz;
466			sp->nrec = t_nrec;
467			sp++;
468		}
469	}
470}
471
472/*
473 * The following swap functions should not create a stack frame
474 * the SPARC call / return instruction will be executed
475 * but the a save / restore will not be executed
476 * which means we won't do a window turn with the spill / fill overhead
477 * verify this by examining the assembly code
478 */
479
480/* ARGSUSED */
481static void
482swapp32(uint32_t *r1, uint32_t *r2, size_t cnt)
483{
484	uint32_t temp;
485
486	temp = *r1;
487	*r1++ = *r2;
488	*r2++ = temp;
489}
490
491/* ARGSUSED */
492static void
493swapp64(uint64_t *r1, uint64_t *r2, size_t cnt)
494{
495	uint64_t temp;
496
497	temp = *r1;
498	*r1++ = *r2;
499	*r2++ = temp;
500}
501
502static void
503swapi(uint32_t *r1, uint32_t *r2, size_t cnt)
504{
505	uint32_t temp;
506
507	/* character by character */
508	while (cnt--) {
509		temp = *r1;
510		*r1++ = *r2;
511		*r2++ = temp;
512	}
513}
514
515static void
516swapb(char *r1, char *r2, size_t cnt)
517{
518	char	temp;
519
520	/* character by character */
521	while (cnt--) {
522		temp = *r1;
523		*r1++ = *r2;
524		*r2++ = temp;
525	}
526}
527