1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#if !defined(_KERNEL) && !defined(_KMDB)
30#include "lint.h"
31#endif /* !_KERNEL && !_KMDB */
32
33#include <sys/types.h>
34
35#if !defined(_KERNEL) && !defined(_KMDB)
36#include <stdlib.h>
37#include <synch.h>
38#endif /* !_KERNEL && !_KMDB */
39
40#include "qsort.h"
41
42static void swapp32(uint32_t *r1, uint32_t *r2, size_t cnt);
43static void swapp64(uint64_t *r1, uint64_t *r2, size_t cnt);
44static void swapi(uint32_t *r1, uint32_t *r2, size_t cnt);
45static void swapb(char *r1, char *r2, size_t cnt);
46
47/*
48 * choose a median of 3 values
49 *
50 * note: cstyle specifically prohibits nested conditional operators
51 * but this is the only way to do the median of 3 function in-line
52 */
53#define	med3(a, b, c) (cmp((a), (b)) < 0) \
54	? ((cmp((b), (c)) < 0) ? (b) : (cmp((a), (c)) < 0) ? (c) : (a)) \
55	: ((cmp((b), (c)) > 0) ? (b) : (cmp((a), (c)) > 0) ? (c) : (a))
56
57#define	THRESH_L	5	/* threshold for insertion sort */
58#define	THRESH_M3	20	/* threshold for median of 3 */
59#define	THRESH_M9	50	/* threshold for median of 9 */
60
61typedef struct {
62	char	*b_lim;
63	size_t	nrec;
64} stk_t;
65
66/*
67 * qsort() is a general purpose, in-place sorting routine using a
68 * user provided call back function for comparisons.  This implementation
69 * utilizes a ternary quicksort algorithm, and cuts over to an
70 * insertion sort for partitions involving fewer than THRESH_L records.
71 *
72 * Potential User Errors
73 *   There is no return value from qsort, this function has no method
74 *   of alerting the user that a sort did not work or could not work.
75 *   We do not print an error message or exit the process or thread,
76 *   Even if we can detect an error, We CANNOT silently return without
77 *   sorting the data, if we did so the user could never be sure the
78 *   sort completed successfully.
79 *   It is possible we could change the return value of sort from void
80 *   to int and return success or some error codes, but this gets into
81 *   standards  and compatibility issues.
82 *
83 *   Examples of qsort parameter errors might be
84 *   1) record size (rsiz) equal to 0
85 *      qsort will loop and never return.
86 *   2) record size (rsiz) less than 0
87 *      rsiz is unsigned, so a negative value is insanely large
88 *   3) number of records (nrec) is 0
89 *      This is legal - qsort will return without examining any records
90 *   4) number of records (nrec) is less than 0
91 *      nrec is unsigned, so a negative value is insanely large.
92 *   5) nrec * rsiz > memory allocation for sort array
93 *      a segment violation may occur
94 *      corruption of other memory may occur
95 *   6) The base address of the sort array is invalid
96 *      a segment violation may occur
97 *      corruption of other memory may occur
98 *   7) The user call back function is invalid
99 *      we may get alignment errors or segment violations
100 *      we may jump into never-never land
101 *
102 *   Some less obvious errors might be
103 *   8) The user compare function is not comparing correctly
104 *   9) The user compare function modifies the data records
105 */
106
107void
108qsort(
109	void		*basep,
110	size_t		nrec,
111	size_t		rsiz,
112	int		(*cmp)(const void *, const void *))
113{
114	size_t		i;		/* temporary variable */
115
116	/* variables used by swap */
117	void		(*swapf)(char *, char *, size_t);
118	size_t		loops;
119
120	/* variables used by sort */
121	stk_t		stack[8 * sizeof (nrec) + 1];
122	stk_t		*sp;
123	char		*b_lim;		/* bottom limit */
124	char		*b_dup;		/* bottom duplicate */
125	char		*b_par;		/* bottom partition */
126	char		*t_lim;		/* top limit */
127	char		*t_dup;		/* top duplicate */
128	char		*t_par;		/* top partition */
129	char		*m1, *m2, *m3;	/* median pointers */
130	uintptr_t	d_bytelength;	/* byte length of duplicate records */
131	int		b_nrec;
132	int		t_nrec;
133	int		cv;		/* results of compare (bottom / top) */
134
135	/*
136	 * choose a swap function based on alignment and size
137	 *
138	 * The qsort function sorts an array of fixed length records.
139	 * We have very limited knowledge about the data record itself.
140	 * It may be that the data record is in the array we are sorting
141	 * or it may be that the array contains pointers or indexes to
142	 * the actual data record and all that we are sorting is the indexes.
143	 *
144	 * The following decision will choose an optimal swap function
145	 * based on the size and alignment of the data records
146	 *   swapp64	will swap 64 bit pointers
147	 *   swapp32	will swap 32 bit pointers
148	 *   swapi	will swap an array of 32 bit integers
149	 *   swapb	will swap an array of 8 bit characters
150	 *
151	 * swapi and swapb will also require the variable loops to be set
152	 * to control the length of the array being swapped
153	 */
154	if ((((uintptr_t)basep & (sizeof (uint64_t) - 1)) == 0) &&
155	    (rsiz == sizeof (uint64_t))) {
156		loops = 1;
157		swapf = (void (*)(char *, char *, size_t))swapp64;
158	} else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
159	    (rsiz == sizeof (uint32_t))) {
160		loops = 1;
161		swapf = (void (*)(char *, char *, size_t))swapp32;
162	} else if ((((uintptr_t)basep & (sizeof (uint32_t) - 1)) == 0) &&
163	    ((rsiz & (sizeof (uint32_t) - 1)) == 0)) {
164		loops = rsiz / sizeof (int);
165		swapf = (void (*)(char *, char *, size_t))swapi;
166	} else {
167		loops = rsiz;
168		swapf = swapb;
169	}
170
171	/*
172	 * qsort is a partitioning sort
173	 *
174	 * the stack is the bookkeeping mechanism to keep track of all
175	 * the partitions.
176	 *
177	 * each sort pass takes one partition and sorts it into two partitions.
178	 * at the top of the loop we simply take the partition on the top
179	 * of the stack and sort it. See the comments at the bottom
180	 * of the loop regarding which partitions to add in what order.
181	 *
182	 * initially put the whole partition on the stack
183	 */
184	sp = stack;
185	sp->b_lim = (char *)basep;
186	sp->nrec = nrec;
187	sp++;
188	while (sp > stack) {
189		sp--;
190		b_lim = sp->b_lim;
191		nrec = sp->nrec;
192
193		/*
194		 * a linear insertion sort i faster than a qsort for
195		 * very small number of records (THRESH_L)
196		 *
197		 * if number records < threshold use linear insertion sort
198		 *
199		 * this also handles the special case where the partition
200		 * 0 or 1 records length.
201		 */
202		if (nrec < THRESH_L) {
203			/*
204			 * Linear insertion sort
205			 */
206			t_par = b_lim;
207			for (i = 1; i < nrec; i++) {
208				t_par += rsiz;
209				b_par = t_par;
210				while (b_par > b_lim) {
211					b_par -= rsiz;
212					if ((*cmp)(b_par, b_par + rsiz) <= 0) {
213						break;
214					}
215					(*swapf)(b_par, b_par + rsiz, loops);
216				}
217			}
218
219			/*
220			 * a linear insertion sort will put all records
221			 * in their final position and will not create
222			 * subpartitions.
223			 *
224			 * therefore when the insertion sort is complete
225			 * just go to the top of the loop and get the
226			 * next partition to sort.
227			 */
228			continue;
229		}
230
231		/* quicksort */
232
233		/*
234		 * choose a pivot record
235		 *
236		 * Ideally the pivot record will divide the partition
237		 * into two equal parts. however we have to balance the
238		 * work involved in selecting the pivot record with the
239		 * expected benefit.
240		 *
241		 * The choice of pivot record depends on the number of
242		 * records in the partition
243		 *
244		 * for small partitions (nrec < THRESH_M3)
245		 *   we just select the record in the middle of the partition
246		 *
247		 * if (nrec >= THRESH_M3 && nrec < THRESH_M9)
248		 *   we select three values and choose the median of 3
249		 *
250		 * if (nrec >= THRESH_M9)
251		 *   then we use an approximate median of 9
252		 *   9 records are selected and grouped in 3 groups of 3
253		 *   the median of each of these 3 groups is fed into another
254		 *   median of 3 decision.
255		 *
256		 * Each median of 3 decision is 2 or 3 compares,
257		 * so median of 9 costs between 8 and 12 compares.
258		 *
259		 * i is byte distance between two consecutive samples
260		 * m2 will point to the pivot record
261		 */
262		if (nrec < THRESH_M3) {
263			m2 = b_lim + (nrec / 2) * rsiz;
264		} else if (nrec < THRESH_M9) {
265			/* use median of 3 */
266			i = ((nrec - 1) / 2) * rsiz;
267			m2 = med3(b_lim, b_lim + i, b_lim + 2 * i);
268		} else {
269			/* approx median of 9 */
270			i = ((nrec - 1) / 8) * rsiz;
271			m1 = med3(b_lim, b_lim +  i, b_lim + 2 * i);
272			m2 = med3(b_lim + 3 * i, b_lim + 4 * i, b_lim + 5 * i);
273			m3 = med3(b_lim + 6 * i, b_lim + 7 * i, b_lim + 8 * i);
274			m2 = med3(m1, m2, m3);
275		}
276
277		/*
278		 * quick sort partitioning
279		 *
280		 * The partition limits are defined by bottom and top pointers
281		 * b_lim and t_lim.
282		 *
283		 * qsort uses a fairly standard method of moving the
284		 * partitioning pointers, b_par and t_par, to the middle of
285		 * the partition and exchanging records that are in the
286		 * wrong part of the partition.
287		 *
288		 * Two enhancements have been made to the basic algorithm.
289		 * One for handling duplicate records and one to minimize
290		 * the number of swaps.
291		 *
292		 * Two duplicate records pointers are (b_dup and t_dup) are
293		 * initially set to b_lim and t_lim.  Each time a record
294		 * whose sort key value is equal to the pivot record is found
295		 * it will be swapped with the record pointed to by
296		 * b_dup or t_dup and the duplicate pointer will be
297		 * incremented toward the center.
298		 * When partitioning is complete, all the duplicate records
299		 * will have been collected at the upper and lower limits of
300		 * the partition and can easily be moved adjacent to the
301		 * pivot record.
302		 *
303		 * The second optimization is to minimize the number of swaps.
304		 * The pointer m2 points to the pivot record.
305		 * During partitioning, if m2 is ever equal to the partitioning
306		 * pointers, b_par or t_par, then b_par or t_par just moves
307		 * onto the next record without doing a compare.
308		 * If as a result of duplicate record detection,
309		 * b_dup or t_dup is ever equal to m2,
310		 * then m2 is changed to point to the duplicate record and
311		 * b_dup or t_dup is incremented with out swapping records.
312		 *
313		 * When partitioning is done, we may not have the same pivot
314		 * record that we started with, but we will have one with
315		 * an equal sort key.
316		 */
317		b_dup = b_par		= b_lim;
318		t_dup = t_par = t_lim	= b_lim + rsiz * (nrec - 1);
319		for (;;) {
320
321			/* move bottom pointer up */
322			for (; b_par <= t_par; b_par += rsiz) {
323				if (b_par == m2) {
324					continue;
325				}
326				cv = cmp(b_par, m2);
327				if (cv > 0) {
328					break;
329				}
330				if (cv == 0) {
331					if (b_dup == m2) {
332						m2 = b_par;
333					} else if (b_dup != b_par) {
334						(*swapf)(b_dup, b_par, loops);
335					}
336					b_dup += rsiz;
337				}
338			}
339
340			/* move top pointer down */
341			for (; b_par < t_par; t_par -= rsiz) {
342				if (t_par == m2) {
343					continue;
344				}
345				cv = cmp(t_par, m2);
346				if (cv < 0) {
347					break;
348				}
349				if (cv == 0) {
350					if (t_dup == m2) {
351						m2 = t_par;
352					} else if (t_dup != t_par) {
353						(*swapf)(t_dup, t_par, loops);
354					}
355					t_dup -= rsiz;
356				}
357			}
358
359			/* break if we are done partitioning */
360			if (b_par >= t_par) {
361				break;
362			}
363
364			/* exchange records at upper and lower break points */
365			(*swapf)(b_par, t_par, loops);
366			b_par += rsiz;
367			t_par -= rsiz;
368		}
369
370		/*
371		 * partitioning is now complete
372		 *
373		 * there are two termination conditions from the partitioning
374		 * loop above.  Either b_par or t_par have crossed or
375		 * they are equal.
376		 *
377		 * we need to swap the pivot record to its final position
378		 * m2 could be in either the upper or lower partitions
379		 * or it could already be in its final position
380		 */
381		/*
382		 * R[b_par] > R[m2]
383		 * R[t_par] < R[m2]
384		 */
385		if (t_par < b_par) {
386			if (m2 < t_par) {
387				(*swapf)(m2, t_par, loops);
388				m2 = b_par = t_par;
389			} else if (m2 > b_par) {
390				(*swapf)(m2, b_par, loops);
391				m2 = t_par = b_par;
392			} else {
393				b_par = t_par = m2;
394			}
395		} else {
396			if (m2 < t_par) {
397				t_par = b_par = t_par - rsiz;
398			}
399			if (m2 != b_par) {
400				(*swapf)(m2, b_par, loops);
401			}
402			m2 = t_par;
403		}
404
405		/*
406		 * move bottom duplicates next to pivot
407		 * optimized to eliminate overlap
408		 */
409		d_bytelength = b_dup - b_lim;
410		if (b_par - b_dup < d_bytelength) {
411			b_dup = b_lim + (b_par - b_dup);
412		}
413		while (b_dup > b_lim) {
414			b_dup -= rsiz;
415			b_par -= rsiz;
416			(*swapf)(b_dup, b_par, loops);
417		}
418		b_par = m2 - d_bytelength;
419
420		/*
421		 * move top duplicates next to pivot
422		 */
423		d_bytelength = t_lim - t_dup;
424		if (t_dup - t_par < d_bytelength) {
425			t_dup = t_lim - (t_dup - t_par);
426		}
427		while (t_dup < t_lim) {
428			t_dup += rsiz;
429			t_par += rsiz;
430			(*swapf)(t_dup, t_par, loops);
431		}
432		t_par = m2 + d_bytelength;
433
434		/*
435		 * when a qsort pass completes there are three partitions
436		 * 1) the lower contains all records less than pivot
437		 * 2) the upper contains all records greater than pivot
438		 * 3) the pivot partition contains all record equal to pivot
439		 *
440		 * all records in the pivot partition are in their final
441		 * position and do not need to be accounted for by the stack
442		 *
443		 * when adding partitions to the stack
444		 * it is important to add the largest partition first
445		 * to prevent stack overflow.
446		 *
447		 * calculate number of unsorted records in top and bottom
448		 * push resulting partitions on stack
449		 */
450		b_nrec = (b_par - b_lim) / rsiz;
451		t_nrec = (t_lim - t_par) / rsiz;
452		if (b_nrec < t_nrec) {
453			sp->b_lim = t_par + rsiz;
454			sp->nrec = t_nrec;
455			sp++;
456			sp->b_lim = b_lim;
457			sp->nrec = b_nrec;
458			sp++;
459		} else {
460			sp->b_lim = b_lim;
461			sp->nrec = b_nrec;
462			sp++;
463			sp->b_lim = t_par + rsiz;
464			sp->nrec = t_nrec;
465			sp++;
466		}
467	}
468}
469
470/*
471 * The following swap functions should not create a stack frame
472 * the SPARC call / return instruction will be executed
473 * but the a save / restore will not be executed
474 * which means we won't do a window turn with the spill / fill overhead
475 * verify this by examining the assembly code
476 */
477
478/* ARGSUSED */
479static void
480swapp32(uint32_t *r1, uint32_t *r2, size_t cnt)
481{
482	uint32_t temp;
483
484	temp = *r1;
485	*r1++ = *r2;
486	*r2++ = temp;
487}
488
489/* ARGSUSED */
490static void
491swapp64(uint64_t *r1, uint64_t *r2, size_t cnt)
492{
493	uint64_t temp;
494
495	temp = *r1;
496	*r1++ = *r2;
497	*r2++ = temp;
498}
499
500static void
501swapi(uint32_t *r1, uint32_t *r2, size_t cnt)
502{
503	uint32_t temp;
504
505	/* character by character */
506	while (cnt--) {
507		temp = *r1;
508		*r1++ = *r2;
509		*r2++ = temp;
510	}
511}
512
513static void
514swapb(char *r1, char *r2, size_t cnt)
515{
516	char	temp;
517
518	/* character by character */
519	while (cnt--) {
520		temp = *r1;
521		*r1++ = *r2;
522		*r2++ = temp;
523	}
524}
525