1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21168404Spjd/*
22219089Spjd * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23168404Spjd * Use is subject to license terms.
24168404Spjd */
25168404Spjd
26168404Spjd/*
27168404Spjd * AVL - generic AVL tree implementation for kernel use
28168404Spjd *
29168404Spjd * A complete description of AVL trees can be found in many CS textbooks.
30168404Spjd *
31168404Spjd * Here is a very brief overview. An AVL tree is a binary search tree that is
32168404Spjd * almost perfectly balanced. By "almost" perfectly balanced, we mean that at
33168404Spjd * any given node, the left and right subtrees are allowed to differ in height
34168404Spjd * by at most 1 level.
35168404Spjd *
36168404Spjd * This relaxation from a perfectly balanced binary tree allows doing
37168404Spjd * insertion and deletion relatively efficiently. Searching the tree is
38168404Spjd * still a fast operation, roughly O(log(N)).
39168404Spjd *
40168404Spjd * The key to insertion and deletion is a set of tree maniuplations called
41168404Spjd * rotations, which bring unbalanced subtrees back into the semi-balanced state.
42168404Spjd *
43168404Spjd * This implementation of AVL trees has the following peculiarities:
44168404Spjd *
45168404Spjd *	- The AVL specific data structures are physically embedded as fields
46168404Spjd *	  in the "using" data structures.  To maintain generality the code
47168404Spjd *	  must constantly translate between "avl_node_t *" and containing
48168404Spjd *	  data structure "void *"s by adding/subracting the avl_offset.
49168404Spjd *
50168404Spjd *	- Since the AVL data is always embedded in other structures, there is
51168404Spjd *	  no locking or memory allocation in the AVL routines. This must be
52168404Spjd *	  provided for by the enclosing data structure's semantics. Typically,
53168404Spjd *	  avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
54168404Spjd *	  exclusive write lock. Other operations require a read lock.
55168404Spjd *
56168404Spjd *      - The implementation uses iteration instead of explicit recursion,
57168404Spjd *	  since it is intended to run on limited size kernel stacks. Since
58168404Spjd *	  there is no recursion stack present to move "up" in the tree,
59168404Spjd *	  there is an explicit "parent" link in the avl_node_t.
60168404Spjd *
61168404Spjd *      - The left/right children pointers of a node are in an array.
62168404Spjd *	  In the code, variables (instead of constants) are used to represent
63168404Spjd *	  left and right indices.  The implementation is written as if it only
64168404Spjd *	  dealt with left handed manipulations.  By changing the value assigned
65168404Spjd *	  to "left", the code also works for right handed trees.  The
66168404Spjd *	  following variables/terms are frequently used:
67168404Spjd *
68168404Spjd *		int left;	// 0 when dealing with left children,
69168404Spjd *				// 1 for dealing with right children
70168404Spjd *
71168404Spjd *		int left_heavy;	// -1 when left subtree is taller at some node,
72168404Spjd *				// +1 when right subtree is taller
73168404Spjd *
74168404Spjd *		int right;	// will be the opposite of left (0 or 1)
75168404Spjd *		int right_heavy;// will be the opposite of left_heavy (-1 or 1)
76168404Spjd *
77168404Spjd *		int direction;  // 0 for "<" (ie. left child); 1 for ">" (right)
78168404Spjd *
79168404Spjd *	  Though it is a little more confusing to read the code, the approach
80168404Spjd *	  allows using half as much code (and hence cache footprint) for tree
81168404Spjd *	  manipulations and eliminates many conditional branches.
82168404Spjd *
83168404Spjd *	- The avl_index_t is an opaque "cookie" used to find nodes at or
84168404Spjd *	  adjacent to where a new value would be inserted in the tree. The value
85168404Spjd *	  is a modified "avl_node_t *".  The bottom bit (normally 0 for a
86168404Spjd *	  pointer) is set to indicate if that the new node has a value greater
87168404Spjd *	  than the value of the indicated "avl_node_t *".
88168404Spjd */
89168404Spjd
90168404Spjd#include <sys/types.h>
91168404Spjd#include <sys/param.h>
92174046Sjb#include <sys/stdint.h>
93168404Spjd#include <sys/debug.h>
94168404Spjd#include <sys/avl.h>
95168404Spjd
96168404Spjd/*
97168404Spjd * Small arrays to translate between balance (or diff) values and child indeces.
98168404Spjd *
99168404Spjd * Code that deals with binary tree data structures will randomly use
100168404Spjd * left and right children when examining a tree.  C "if()" statements
101168404Spjd * which evaluate randomly suffer from very poor hardware branch prediction.
102168404Spjd * In this code we avoid some of the branch mispredictions by using the
103168404Spjd * following translation arrays. They replace random branches with an
104168404Spjd * additional memory reference. Since the translation arrays are both very
105168404Spjd * small the data should remain efficiently in cache.
106168404Spjd */
107168404Spjdstatic const int  avl_child2balance[2]	= {-1, 1};
108168404Spjdstatic const int  avl_balance2child[]	= {0, 0, 1};
109168404Spjd
110168404Spjd
111168404Spjd/*
112168404Spjd * Walk from one node to the previous valued node (ie. an infix walk
113168404Spjd * towards the left). At any given node we do one of 2 things:
114168404Spjd *
115168404Spjd * - If there is a left child, go to it, then to it's rightmost descendant.
116168404Spjd *
117168404Spjd * - otherwise we return thru parent nodes until we've come from a right child.
118168404Spjd *
119168404Spjd * Return Value:
120168404Spjd * NULL - if at the end of the nodes
121168404Spjd * otherwise next node
122168404Spjd */
123168404Spjdvoid *
124168404Spjdavl_walk(avl_tree_t *tree, void	*oldnode, int left)
125168404Spjd{
126168404Spjd	size_t off = tree->avl_offset;
127168404Spjd	avl_node_t *node = AVL_DATA2NODE(oldnode, off);
128168404Spjd	int right = 1 - left;
129168404Spjd	int was_child;
130168404Spjd
131168404Spjd
132168404Spjd	/*
133168404Spjd	 * nowhere to walk to if tree is empty
134168404Spjd	 */
135168404Spjd	if (node == NULL)
136168404Spjd		return (NULL);
137168404Spjd
138168404Spjd	/*
139168404Spjd	 * Visit the previous valued node. There are two possibilities:
140168404Spjd	 *
141168404Spjd	 * If this node has a left child, go down one left, then all
142168404Spjd	 * the way right.
143168404Spjd	 */
144168404Spjd	if (node->avl_child[left] != NULL) {
145168404Spjd		for (node = node->avl_child[left];
146168404Spjd		    node->avl_child[right] != NULL;
147168404Spjd		    node = node->avl_child[right])
148168404Spjd			;
149168404Spjd	/*
150168404Spjd	 * Otherwise, return thru left children as far as we can.
151168404Spjd	 */
152168404Spjd	} else {
153168404Spjd		for (;;) {
154168404Spjd			was_child = AVL_XCHILD(node);
155168404Spjd			node = AVL_XPARENT(node);
156168404Spjd			if (node == NULL)
157168404Spjd				return (NULL);
158168404Spjd			if (was_child == right)
159168404Spjd				break;
160168404Spjd		}
161168404Spjd	}
162168404Spjd
163168404Spjd	return (AVL_NODE2DATA(node, off));
164168404Spjd}
165168404Spjd
166168404Spjd/*
167168404Spjd * Return the lowest valued node in a tree or NULL.
168168404Spjd * (leftmost child from root of tree)
169168404Spjd */
170168404Spjdvoid *
171168404Spjdavl_first(avl_tree_t *tree)
172168404Spjd{
173168404Spjd	avl_node_t *node;
174168404Spjd	avl_node_t *prev = NULL;
175168404Spjd	size_t off = tree->avl_offset;
176168404Spjd
177168404Spjd	for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
178168404Spjd		prev = node;
179168404Spjd
180168404Spjd	if (prev != NULL)
181168404Spjd		return (AVL_NODE2DATA(prev, off));
182168404Spjd	return (NULL);
183168404Spjd}
184168404Spjd
185168404Spjd/*
186168404Spjd * Return the highest valued node in a tree or NULL.
187168404Spjd * (rightmost child from root of tree)
188168404Spjd */
189168404Spjdvoid *
190168404Spjdavl_last(avl_tree_t *tree)
191168404Spjd{
192168404Spjd	avl_node_t *node;
193168404Spjd	avl_node_t *prev = NULL;
194168404Spjd	size_t off = tree->avl_offset;
195168404Spjd
196168404Spjd	for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
197168404Spjd		prev = node;
198168404Spjd
199168404Spjd	if (prev != NULL)
200168404Spjd		return (AVL_NODE2DATA(prev, off));
201168404Spjd	return (NULL);
202168404Spjd}
203168404Spjd
204168404Spjd/*
205168404Spjd * Access the node immediately before or after an insertion point.
206168404Spjd *
207168404Spjd * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
208168404Spjd *
209168404Spjd * Return value:
210168404Spjd *	NULL: no node in the given direction
211168404Spjd *	"void *"  of the found tree node
212168404Spjd */
213168404Spjdvoid *
214168404Spjdavl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
215168404Spjd{
216168404Spjd	int child = AVL_INDEX2CHILD(where);
217168404Spjd	avl_node_t *node = AVL_INDEX2NODE(where);
218168404Spjd	void *data;
219168404Spjd	size_t off = tree->avl_offset;
220168404Spjd
221168404Spjd	if (node == NULL) {
222168404Spjd		ASSERT(tree->avl_root == NULL);
223168404Spjd		return (NULL);
224168404Spjd	}
225168404Spjd	data = AVL_NODE2DATA(node, off);
226168404Spjd	if (child != direction)
227168404Spjd		return (data);
228168404Spjd
229168404Spjd	return (avl_walk(tree, data, direction));
230168404Spjd}
231168404Spjd
232168404Spjd
233168404Spjd/*
234168404Spjd * Search for the node which contains "value".  The algorithm is a
235168404Spjd * simple binary tree search.
236168404Spjd *
237168404Spjd * return value:
238168404Spjd *	NULL: the value is not in the AVL tree
239168404Spjd *		*where (if not NULL)  is set to indicate the insertion point
240168404Spjd *	"void *"  of the found tree node
241168404Spjd */
242168404Spjdvoid *
243219089Spjdavl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
244168404Spjd{
245168404Spjd	avl_node_t *node;
246168404Spjd	avl_node_t *prev = NULL;
247168404Spjd	int child = 0;
248168404Spjd	int diff;
249168404Spjd	size_t off = tree->avl_offset;
250168404Spjd
251168404Spjd	for (node = tree->avl_root; node != NULL;
252168404Spjd	    node = node->avl_child[child]) {
253168404Spjd
254168404Spjd		prev = node;
255168404Spjd
256168404Spjd		diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
257168404Spjd		ASSERT(-1 <= diff && diff <= 1);
258168404Spjd		if (diff == 0) {
259168404Spjd#ifdef DEBUG
260168404Spjd			if (where != NULL)
261168404Spjd				*where = 0;
262168404Spjd#endif
263168404Spjd			return (AVL_NODE2DATA(node, off));
264168404Spjd		}
265168404Spjd		child = avl_balance2child[1 + diff];
266168404Spjd
267168404Spjd	}
268168404Spjd
269168404Spjd	if (where != NULL)
270168404Spjd		*where = AVL_MKINDEX(prev, child);
271168404Spjd
272168404Spjd	return (NULL);
273168404Spjd}
274168404Spjd
275168404Spjd
276168404Spjd/*
277168404Spjd * Perform a rotation to restore balance at the subtree given by depth.
278168404Spjd *
279168404Spjd * This routine is used by both insertion and deletion. The return value
280168404Spjd * indicates:
281168404Spjd *	 0 : subtree did not change height
282168404Spjd *	!0 : subtree was reduced in height
283168404Spjd *
284168404Spjd * The code is written as if handling left rotations, right rotations are
285168404Spjd * symmetric and handled by swapping values of variables right/left[_heavy]
286168404Spjd *
287168404Spjd * On input balance is the "new" balance at "node". This value is either
288168404Spjd * -2 or +2.
289168404Spjd */
290168404Spjdstatic int
291168404Spjdavl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
292168404Spjd{
293168404Spjd	int left = !(balance < 0);	/* when balance = -2, left will be 0 */
294168404Spjd	int right = 1 - left;
295168404Spjd	int left_heavy = balance >> 1;
296168404Spjd	int right_heavy = -left_heavy;
297168404Spjd	avl_node_t *parent = AVL_XPARENT(node);
298168404Spjd	avl_node_t *child = node->avl_child[left];
299168404Spjd	avl_node_t *cright;
300168404Spjd	avl_node_t *gchild;
301168404Spjd	avl_node_t *gright;
302168404Spjd	avl_node_t *gleft;
303168404Spjd	int which_child = AVL_XCHILD(node);
304168404Spjd	int child_bal = AVL_XBALANCE(child);
305168404Spjd
306168404Spjd	/* BEGIN CSTYLED */
307168404Spjd	/*
308168404Spjd	 * case 1 : node is overly left heavy, the left child is balanced or
309168404Spjd	 * also left heavy. This requires the following rotation.
310168404Spjd	 *
311168404Spjd	 *                   (node bal:-2)
312168404Spjd	 *                    /           \
313168404Spjd	 *                   /             \
314168404Spjd	 *              (child bal:0 or -1)
315168404Spjd	 *              /    \
316168404Spjd	 *             /      \
317168404Spjd	 *                     cright
318168404Spjd	 *
319168404Spjd	 * becomes:
320168404Spjd	 *
321168404Spjd	 *              (child bal:1 or 0)
322168404Spjd	 *              /        \
323168404Spjd	 *             /          \
324168404Spjd	 *                        (node bal:-1 or 0)
325168404Spjd	 *                         /     \
326168404Spjd	 *                        /       \
327168404Spjd	 *                     cright
328168404Spjd	 *
329168404Spjd	 * we detect this situation by noting that child's balance is not
330168404Spjd	 * right_heavy.
331168404Spjd	 */
332168404Spjd	/* END CSTYLED */
333168404Spjd	if (child_bal != right_heavy) {
334168404Spjd
335168404Spjd		/*
336168404Spjd		 * compute new balance of nodes
337168404Spjd		 *
338168404Spjd		 * If child used to be left heavy (now balanced) we reduced
339168404Spjd		 * the height of this sub-tree -- used in "return...;" below
340168404Spjd		 */
341168404Spjd		child_bal += right_heavy; /* adjust towards right */
342168404Spjd
343168404Spjd		/*
344168404Spjd		 * move "cright" to be node's left child
345168404Spjd		 */
346168404Spjd		cright = child->avl_child[right];
347168404Spjd		node->avl_child[left] = cright;
348168404Spjd		if (cright != NULL) {
349168404Spjd			AVL_SETPARENT(cright, node);
350168404Spjd			AVL_SETCHILD(cright, left);
351168404Spjd		}
352168404Spjd
353168404Spjd		/*
354168404Spjd		 * move node to be child's right child
355168404Spjd		 */
356168404Spjd		child->avl_child[right] = node;
357168404Spjd		AVL_SETBALANCE(node, -child_bal);
358168404Spjd		AVL_SETCHILD(node, right);
359168404Spjd		AVL_SETPARENT(node, child);
360168404Spjd
361168404Spjd		/*
362168404Spjd		 * update the pointer into this subtree
363168404Spjd		 */
364168404Spjd		AVL_SETBALANCE(child, child_bal);
365168404Spjd		AVL_SETCHILD(child, which_child);
366168404Spjd		AVL_SETPARENT(child, parent);
367168404Spjd		if (parent != NULL)
368168404Spjd			parent->avl_child[which_child] = child;
369168404Spjd		else
370168404Spjd			tree->avl_root = child;
371168404Spjd
372168404Spjd		return (child_bal == 0);
373168404Spjd	}
374168404Spjd
375168404Spjd	/* BEGIN CSTYLED */
376168404Spjd	/*
377168404Spjd	 * case 2 : When node is left heavy, but child is right heavy we use
378168404Spjd	 * a different rotation.
379168404Spjd	 *
380168404Spjd	 *                   (node b:-2)
381168404Spjd	 *                    /   \
382168404Spjd	 *                   /     \
383168404Spjd	 *                  /       \
384168404Spjd	 *             (child b:+1)
385168404Spjd	 *              /     \
386168404Spjd	 *             /       \
387168404Spjd	 *                   (gchild b: != 0)
388168404Spjd	 *                     /  \
389168404Spjd	 *                    /    \
390168404Spjd	 *                 gleft   gright
391168404Spjd	 *
392168404Spjd	 * becomes:
393168404Spjd	 *
394168404Spjd	 *              (gchild b:0)
395168404Spjd	 *              /       \
396168404Spjd	 *             /         \
397168404Spjd	 *            /           \
398168404Spjd	 *        (child b:?)   (node b:?)
399168404Spjd	 *         /  \          /   \
400168404Spjd	 *        /    \        /     \
401168404Spjd	 *            gleft   gright
402168404Spjd	 *
403168404Spjd	 * computing the new balances is more complicated. As an example:
404168404Spjd	 *	 if gchild was right_heavy, then child is now left heavy
405168404Spjd	 *		else it is balanced
406168404Spjd	 */
407168404Spjd	/* END CSTYLED */
408168404Spjd	gchild = child->avl_child[right];
409168404Spjd	gleft = gchild->avl_child[left];
410168404Spjd	gright = gchild->avl_child[right];
411168404Spjd
412168404Spjd	/*
413168404Spjd	 * move gright to left child of node and
414168404Spjd	 *
415168404Spjd	 * move gleft to right child of node
416168404Spjd	 */
417168404Spjd	node->avl_child[left] = gright;
418168404Spjd	if (gright != NULL) {
419168404Spjd		AVL_SETPARENT(gright, node);
420168404Spjd		AVL_SETCHILD(gright, left);
421168404Spjd	}
422168404Spjd
423168404Spjd	child->avl_child[right] = gleft;
424168404Spjd	if (gleft != NULL) {
425168404Spjd		AVL_SETPARENT(gleft, child);
426168404Spjd		AVL_SETCHILD(gleft, right);
427168404Spjd	}
428168404Spjd
429168404Spjd	/*
430168404Spjd	 * move child to left child of gchild and
431168404Spjd	 *
432168404Spjd	 * move node to right child of gchild and
433168404Spjd	 *
434168404Spjd	 * fixup parent of all this to point to gchild
435168404Spjd	 */
436168404Spjd	balance = AVL_XBALANCE(gchild);
437168404Spjd	gchild->avl_child[left] = child;
438168404Spjd	AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
439168404Spjd	AVL_SETPARENT(child, gchild);
440168404Spjd	AVL_SETCHILD(child, left);
441168404Spjd
442168404Spjd	gchild->avl_child[right] = node;
443168404Spjd	AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
444168404Spjd	AVL_SETPARENT(node, gchild);
445168404Spjd	AVL_SETCHILD(node, right);
446168404Spjd
447168404Spjd	AVL_SETBALANCE(gchild, 0);
448168404Spjd	AVL_SETPARENT(gchild, parent);
449168404Spjd	AVL_SETCHILD(gchild, which_child);
450168404Spjd	if (parent != NULL)
451168404Spjd		parent->avl_child[which_child] = gchild;
452168404Spjd	else
453168404Spjd		tree->avl_root = gchild;
454168404Spjd
455168404Spjd	return (1);	/* the new tree is always shorter */
456168404Spjd}
457168404Spjd
458168404Spjd
459168404Spjd/*
460168404Spjd * Insert a new node into an AVL tree at the specified (from avl_find()) place.
461168404Spjd *
462168404Spjd * Newly inserted nodes are always leaf nodes in the tree, since avl_find()
463168404Spjd * searches out to the leaf positions.  The avl_index_t indicates the node
464168404Spjd * which will be the parent of the new node.
465168404Spjd *
466168404Spjd * After the node is inserted, a single rotation further up the tree may
467168404Spjd * be necessary to maintain an acceptable AVL balance.
468168404Spjd */
469168404Spjdvoid
470168404Spjdavl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
471168404Spjd{
472168404Spjd	avl_node_t *node;
473168404Spjd	avl_node_t *parent = AVL_INDEX2NODE(where);
474168404Spjd	int old_balance;
475168404Spjd	int new_balance;
476168404Spjd	int which_child = AVL_INDEX2CHILD(where);
477168404Spjd	size_t off = tree->avl_offset;
478168404Spjd
479168404Spjd	ASSERT(tree);
480168404Spjd#ifdef _LP64
481168404Spjd	ASSERT(((uintptr_t)new_data & 0x7) == 0);
482168404Spjd#endif
483168404Spjd
484168404Spjd	node = AVL_DATA2NODE(new_data, off);
485168404Spjd
486168404Spjd	/*
487168404Spjd	 * First, add the node to the tree at the indicated position.
488168404Spjd	 */
489168404Spjd	++tree->avl_numnodes;
490168404Spjd
491168404Spjd	node->avl_child[0] = NULL;
492168404Spjd	node->avl_child[1] = NULL;
493168404Spjd
494168404Spjd	AVL_SETCHILD(node, which_child);
495168404Spjd	AVL_SETBALANCE(node, 0);
496168404Spjd	AVL_SETPARENT(node, parent);
497168404Spjd	if (parent != NULL) {
498168404Spjd		ASSERT(parent->avl_child[which_child] == NULL);
499168404Spjd		parent->avl_child[which_child] = node;
500168404Spjd	} else {
501168404Spjd		ASSERT(tree->avl_root == NULL);
502168404Spjd		tree->avl_root = node;
503168404Spjd	}
504168404Spjd	/*
505168404Spjd	 * Now, back up the tree modifying the balance of all nodes above the
506168404Spjd	 * insertion point. If we get to a highly unbalanced ancestor, we
507168404Spjd	 * need to do a rotation.  If we back out of the tree we are done.
508168404Spjd	 * If we brought any subtree into perfect balance (0), we are also done.
509168404Spjd	 */
510168404Spjd	for (;;) {
511168404Spjd		node = parent;
512168404Spjd		if (node == NULL)
513168404Spjd			return;
514168404Spjd
515168404Spjd		/*
516168404Spjd		 * Compute the new balance
517168404Spjd		 */
518168404Spjd		old_balance = AVL_XBALANCE(node);
519168404Spjd		new_balance = old_balance + avl_child2balance[which_child];
520168404Spjd
521168404Spjd		/*
522168404Spjd		 * If we introduced equal balance, then we are done immediately
523168404Spjd		 */
524168404Spjd		if (new_balance == 0) {
525168404Spjd			AVL_SETBALANCE(node, 0);
526168404Spjd			return;
527168404Spjd		}
528168404Spjd
529168404Spjd		/*
530168404Spjd		 * If both old and new are not zero we went
531168404Spjd		 * from -1 to -2 balance, do a rotation.
532168404Spjd		 */
533168404Spjd		if (old_balance != 0)
534168404Spjd			break;
535168404Spjd
536168404Spjd		AVL_SETBALANCE(node, new_balance);
537168404Spjd		parent = AVL_XPARENT(node);
538168404Spjd		which_child = AVL_XCHILD(node);
539168404Spjd	}
540168404Spjd
541168404Spjd	/*
542168404Spjd	 * perform a rotation to fix the tree and return
543168404Spjd	 */
544168404Spjd	(void) avl_rotation(tree, node, new_balance);
545168404Spjd}
546168404Spjd
547168404Spjd/*
548168404Spjd * Insert "new_data" in "tree" in the given "direction" either after or
549168404Spjd * before (AVL_AFTER, AVL_BEFORE) the data "here".
550168404Spjd *
551168404Spjd * Insertions can only be done at empty leaf points in the tree, therefore
552168404Spjd * if the given child of the node is already present we move to either
553168404Spjd * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
554168404Spjd * every other node in the tree is a leaf, this always works.
555168404Spjd *
556168404Spjd * To help developers using this interface, we assert that the new node
557168404Spjd * is correctly ordered at every step of the way in DEBUG kernels.
558168404Spjd */
559168404Spjdvoid
560168404Spjdavl_insert_here(
561168404Spjd	avl_tree_t *tree,
562168404Spjd	void *new_data,
563168404Spjd	void *here,
564168404Spjd	int direction)
565168404Spjd{
566168404Spjd	avl_node_t *node;
567168404Spjd	int child = direction;	/* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
568168404Spjd#ifdef DEBUG
569168404Spjd	int diff;
570168404Spjd#endif
571168404Spjd
572168404Spjd	ASSERT(tree != NULL);
573168404Spjd	ASSERT(new_data != NULL);
574168404Spjd	ASSERT(here != NULL);
575168404Spjd	ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER);
576168404Spjd
577168404Spjd	/*
578168404Spjd	 * If corresponding child of node is not NULL, go to the neighboring
579168404Spjd	 * node and reverse the insertion direction.
580168404Spjd	 */
581168404Spjd	node = AVL_DATA2NODE(here, tree->avl_offset);
582168404Spjd
583168404Spjd#ifdef DEBUG
584168404Spjd	diff = tree->avl_compar(new_data, here);
585168404Spjd	ASSERT(-1 <= diff && diff <= 1);
586168404Spjd	ASSERT(diff != 0);
587168404Spjd	ASSERT(diff > 0 ? child == 1 : child == 0);
588168404Spjd#endif
589168404Spjd
590168404Spjd	if (node->avl_child[child] != NULL) {
591168404Spjd		node = node->avl_child[child];
592168404Spjd		child = 1 - child;
593168404Spjd		while (node->avl_child[child] != NULL) {
594168404Spjd#ifdef DEBUG
595168404Spjd			diff = tree->avl_compar(new_data,
596168404Spjd			    AVL_NODE2DATA(node, tree->avl_offset));
597168404Spjd			ASSERT(-1 <= diff && diff <= 1);
598168404Spjd			ASSERT(diff != 0);
599168404Spjd			ASSERT(diff > 0 ? child == 1 : child == 0);
600168404Spjd#endif
601168404Spjd			node = node->avl_child[child];
602168404Spjd		}
603168404Spjd#ifdef DEBUG
604168404Spjd		diff = tree->avl_compar(new_data,
605168404Spjd		    AVL_NODE2DATA(node, tree->avl_offset));
606168404Spjd		ASSERT(-1 <= diff && diff <= 1);
607168404Spjd		ASSERT(diff != 0);
608168404Spjd		ASSERT(diff > 0 ? child == 1 : child == 0);
609168404Spjd#endif
610168404Spjd	}
611168404Spjd	ASSERT(node->avl_child[child] == NULL);
612168404Spjd
613168404Spjd	avl_insert(tree, new_data, AVL_MKINDEX(node, child));
614168404Spjd}
615168404Spjd
616168404Spjd/*
617168404Spjd * Add a new node to an AVL tree.
618168404Spjd */
619168404Spjdvoid
620168404Spjdavl_add(avl_tree_t *tree, void *new_node)
621168404Spjd{
622168404Spjd	avl_index_t where;
623168404Spjd
624168404Spjd	/*
625168404Spjd	 * This is unfortunate.  We want to call panic() here, even for
626168404Spjd	 * non-DEBUG kernels.  In userland, however, we can't depend on anything
627168404Spjd	 * in libc or else the rtld build process gets confused.  So, all we can
628168404Spjd	 * do in userland is resort to a normal ASSERT().
629168404Spjd	 */
630168404Spjd	if (avl_find(tree, new_node, &where) != NULL)
631168404Spjd#ifdef _KERNEL
632168404Spjd		panic("avl_find() succeeded inside avl_add()");
633168404Spjd#else
634168404Spjd		ASSERT(0);
635168404Spjd#endif
636168404Spjd	avl_insert(tree, new_node, where);
637168404Spjd}
638168404Spjd
639168404Spjd/*
640168404Spjd * Delete a node from the AVL tree.  Deletion is similar to insertion, but
641168404Spjd * with 2 complications.
642168404Spjd *
643168404Spjd * First, we may be deleting an interior node. Consider the following subtree:
644168404Spjd *
645168404Spjd *     d           c            c
646168404Spjd *    / \         / \          / \
647168404Spjd *   b   e       b   e        b   e
648168404Spjd *  / \	        / \          /
649168404Spjd * a   c       a            a
650168404Spjd *
651168404Spjd * When we are deleting node (d), we find and bring up an adjacent valued leaf
652168404Spjd * node, say (c), to take the interior node's place. In the code this is
653168404Spjd * handled by temporarily swapping (d) and (c) in the tree and then using
654168404Spjd * common code to delete (d) from the leaf position.
655168404Spjd *
656168404Spjd * Secondly, an interior deletion from a deep tree may require more than one
657168404Spjd * rotation to fix the balance. This is handled by moving up the tree through
658168404Spjd * parents and applying rotations as needed. The return value from
659168404Spjd * avl_rotation() is used to detect when a subtree did not change overall
660168404Spjd * height due to a rotation.
661168404Spjd */
662168404Spjdvoid
663168404Spjdavl_remove(avl_tree_t *tree, void *data)
664168404Spjd{
665168404Spjd	avl_node_t *delete;
666168404Spjd	avl_node_t *parent;
667168404Spjd	avl_node_t *node;
668168404Spjd	avl_node_t tmp;
669168404Spjd	int old_balance;
670168404Spjd	int new_balance;
671168404Spjd	int left;
672168404Spjd	int right;
673168404Spjd	int which_child;
674168404Spjd	size_t off = tree->avl_offset;
675168404Spjd
676168404Spjd	ASSERT(tree);
677168404Spjd
678168404Spjd	delete = AVL_DATA2NODE(data, off);
679168404Spjd
680168404Spjd	/*
681168404Spjd	 * Deletion is easiest with a node that has at most 1 child.
682168404Spjd	 * We swap a node with 2 children with a sequentially valued
683168404Spjd	 * neighbor node. That node will have at most 1 child. Note this
684168404Spjd	 * has no effect on the ordering of the remaining nodes.
685168404Spjd	 *
686168404Spjd	 * As an optimization, we choose the greater neighbor if the tree
687168404Spjd	 * is right heavy, otherwise the left neighbor. This reduces the
688168404Spjd	 * number of rotations needed.
689168404Spjd	 */
690168404Spjd	if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
691168404Spjd
692168404Spjd		/*
693168404Spjd		 * choose node to swap from whichever side is taller
694168404Spjd		 */
695168404Spjd		old_balance = AVL_XBALANCE(delete);
696168404Spjd		left = avl_balance2child[old_balance + 1];
697168404Spjd		right = 1 - left;
698168404Spjd
699168404Spjd		/*
700168404Spjd		 * get to the previous value'd node
701168404Spjd		 * (down 1 left, as far as possible right)
702168404Spjd		 */
703168404Spjd		for (node = delete->avl_child[left];
704168404Spjd		    node->avl_child[right] != NULL;
705168404Spjd		    node = node->avl_child[right])
706168404Spjd			;
707168404Spjd
708168404Spjd		/*
709168404Spjd		 * create a temp placeholder for 'node'
710168404Spjd		 * move 'node' to delete's spot in the tree
711168404Spjd		 */
712168404Spjd		tmp = *node;
713168404Spjd
714168404Spjd		*node = *delete;
715168404Spjd		if (node->avl_child[left] == node)
716168404Spjd			node->avl_child[left] = &tmp;
717168404Spjd
718168404Spjd		parent = AVL_XPARENT(node);
719168404Spjd		if (parent != NULL)
720168404Spjd			parent->avl_child[AVL_XCHILD(node)] = node;
721168404Spjd		else
722168404Spjd			tree->avl_root = node;
723168404Spjd		AVL_SETPARENT(node->avl_child[left], node);
724168404Spjd		AVL_SETPARENT(node->avl_child[right], node);
725168404Spjd
726168404Spjd		/*
727168404Spjd		 * Put tmp where node used to be (just temporary).
728168404Spjd		 * It always has a parent and at most 1 child.
729168404Spjd		 */
730168404Spjd		delete = &tmp;
731168404Spjd		parent = AVL_XPARENT(delete);
732168404Spjd		parent->avl_child[AVL_XCHILD(delete)] = delete;
733168404Spjd		which_child = (delete->avl_child[1] != 0);
734168404Spjd		if (delete->avl_child[which_child] != NULL)
735168404Spjd			AVL_SETPARENT(delete->avl_child[which_child], delete);
736168404Spjd	}
737168404Spjd
738168404Spjd
739168404Spjd	/*
740168404Spjd	 * Here we know "delete" is at least partially a leaf node. It can
741168404Spjd	 * be easily removed from the tree.
742168404Spjd	 */
743168404Spjd	ASSERT(tree->avl_numnodes > 0);
744168404Spjd	--tree->avl_numnodes;
745168404Spjd	parent = AVL_XPARENT(delete);
746168404Spjd	which_child = AVL_XCHILD(delete);
747168404Spjd	if (delete->avl_child[0] != NULL)
748168404Spjd		node = delete->avl_child[0];
749168404Spjd	else
750168404Spjd		node = delete->avl_child[1];
751168404Spjd
752168404Spjd	/*
753168404Spjd	 * Connect parent directly to node (leaving out delete).
754168404Spjd	 */
755168404Spjd	if (node != NULL) {
756168404Spjd		AVL_SETPARENT(node, parent);
757168404Spjd		AVL_SETCHILD(node, which_child);
758168404Spjd	}
759168404Spjd	if (parent == NULL) {
760168404Spjd		tree->avl_root = node;
761168404Spjd		return;
762168404Spjd	}
763168404Spjd	parent->avl_child[which_child] = node;
764168404Spjd
765168404Spjd
766168404Spjd	/*
767168404Spjd	 * Since the subtree is now shorter, begin adjusting parent balances
768168404Spjd	 * and performing any needed rotations.
769168404Spjd	 */
770168404Spjd	do {
771168404Spjd
772168404Spjd		/*
773168404Spjd		 * Move up the tree and adjust the balance
774168404Spjd		 *
775168404Spjd		 * Capture the parent and which_child values for the next
776168404Spjd		 * iteration before any rotations occur.
777168404Spjd		 */
778168404Spjd		node = parent;
779168404Spjd		old_balance = AVL_XBALANCE(node);
780168404Spjd		new_balance = old_balance - avl_child2balance[which_child];
781168404Spjd		parent = AVL_XPARENT(node);
782168404Spjd		which_child = AVL_XCHILD(node);
783168404Spjd
784168404Spjd		/*
785168404Spjd		 * If a node was in perfect balance but isn't anymore then
786168404Spjd		 * we can stop, since the height didn't change above this point
787168404Spjd		 * due to a deletion.
788168404Spjd		 */
789168404Spjd		if (old_balance == 0) {
790168404Spjd			AVL_SETBALANCE(node, new_balance);
791168404Spjd			break;
792168404Spjd		}
793168404Spjd
794168404Spjd		/*
795168404Spjd		 * If the new balance is zero, we don't need to rotate
796168404Spjd		 * else
797168404Spjd		 * need a rotation to fix the balance.
798168404Spjd		 * If the rotation doesn't change the height
799168404Spjd		 * of the sub-tree we have finished adjusting.
800168404Spjd		 */
801168404Spjd		if (new_balance == 0)
802168404Spjd			AVL_SETBALANCE(node, new_balance);
803168404Spjd		else if (!avl_rotation(tree, node, new_balance))
804168404Spjd			break;
805168404Spjd	} while (parent != NULL);
806168404Spjd}
807168404Spjd
808185029Spjd#define	AVL_REINSERT(tree, obj)		\
809185029Spjd	avl_remove((tree), (obj));	\
810185029Spjd	avl_add((tree), (obj))
811185029Spjd
812185029Spjdboolean_t
813185029Spjdavl_update_lt(avl_tree_t *t, void *obj)
814185029Spjd{
815185029Spjd	void *neighbor;
816185029Spjd
817185029Spjd	ASSERT(((neighbor = AVL_NEXT(t, obj)) == NULL) ||
818185029Spjd	    (t->avl_compar(obj, neighbor) <= 0));
819185029Spjd
820185029Spjd	neighbor = AVL_PREV(t, obj);
821185029Spjd	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
822185029Spjd		AVL_REINSERT(t, obj);
823185029Spjd		return (B_TRUE);
824185029Spjd	}
825185029Spjd
826185029Spjd	return (B_FALSE);
827185029Spjd}
828185029Spjd
829185029Spjdboolean_t
830185029Spjdavl_update_gt(avl_tree_t *t, void *obj)
831185029Spjd{
832185029Spjd	void *neighbor;
833185029Spjd
834185029Spjd	ASSERT(((neighbor = AVL_PREV(t, obj)) == NULL) ||
835185029Spjd	    (t->avl_compar(obj, neighbor) >= 0));
836185029Spjd
837185029Spjd	neighbor = AVL_NEXT(t, obj);
838185029Spjd	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
839185029Spjd		AVL_REINSERT(t, obj);
840185029Spjd		return (B_TRUE);
841185029Spjd	}
842185029Spjd
843185029Spjd	return (B_FALSE);
844185029Spjd}
845185029Spjd
846185029Spjdboolean_t
847185029Spjdavl_update(avl_tree_t *t, void *obj)
848185029Spjd{
849185029Spjd	void *neighbor;
850185029Spjd
851185029Spjd	neighbor = AVL_PREV(t, obj);
852185029Spjd	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
853185029Spjd		AVL_REINSERT(t, obj);
854185029Spjd		return (B_TRUE);
855185029Spjd	}
856185029Spjd
857185029Spjd	neighbor = AVL_NEXT(t, obj);
858185029Spjd	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
859185029Spjd		AVL_REINSERT(t, obj);
860185029Spjd		return (B_TRUE);
861185029Spjd	}
862185029Spjd
863185029Spjd	return (B_FALSE);
864185029Spjd}
865185029Spjd
866168404Spjd/*
867168404Spjd * initialize a new AVL tree
868168404Spjd */
869168404Spjdvoid
870168404Spjdavl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
871168404Spjd    size_t size, size_t offset)
872168404Spjd{
873168404Spjd	ASSERT(tree);
874168404Spjd	ASSERT(compar);
875168404Spjd	ASSERT(size > 0);
876168404Spjd	ASSERT(size >= offset + sizeof (avl_node_t));
877168404Spjd#ifdef _LP64
878168404Spjd	ASSERT((offset & 0x7) == 0);
879168404Spjd#endif
880168404Spjd
881168404Spjd	tree->avl_compar = compar;
882168404Spjd	tree->avl_root = NULL;
883168404Spjd	tree->avl_numnodes = 0;
884168404Spjd	tree->avl_size = size;
885168404Spjd	tree->avl_offset = offset;
886168404Spjd}
887168404Spjd
888168404Spjd/*
889168404Spjd * Delete a tree.
890168404Spjd */
891168404Spjd/* ARGSUSED */
892168404Spjdvoid
893168404Spjdavl_destroy(avl_tree_t *tree)
894168404Spjd{
895168404Spjd	ASSERT(tree);
896168404Spjd	ASSERT(tree->avl_numnodes == 0);
897168404Spjd	ASSERT(tree->avl_root == NULL);
898168404Spjd}
899168404Spjd
900168404Spjd
901168404Spjd/*
902168404Spjd * Return the number of nodes in an AVL tree.
903168404Spjd */
904168404Spjdulong_t
905168404Spjdavl_numnodes(avl_tree_t *tree)
906168404Spjd{
907168404Spjd	ASSERT(tree);
908168404Spjd	return (tree->avl_numnodes);
909168404Spjd}
910168404Spjd
911185029Spjdboolean_t
912185029Spjdavl_is_empty(avl_tree_t *tree)
913185029Spjd{
914185029Spjd	ASSERT(tree);
915185029Spjd	return (tree->avl_numnodes == 0);
916185029Spjd}
917168404Spjd
918168404Spjd#define	CHILDBIT	(1L)
919168404Spjd
920168404Spjd/*
921168404Spjd * Post-order tree walk used to visit all tree nodes and destroy the tree
922168404Spjd * in post order. This is used for destroying a tree w/o paying any cost
923168404Spjd * for rebalancing it.
924168404Spjd *
925168404Spjd * example:
926168404Spjd *
927168404Spjd *	void *cookie = NULL;
928168404Spjd *	my_data_t *node;
929168404Spjd *
930168404Spjd *	while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
931168404Spjd *		free(node);
932168404Spjd *	avl_destroy(tree);
933168404Spjd *
934168404Spjd * The cookie is really an avl_node_t to the current node's parent and
935168404Spjd * an indication of which child you looked at last.
936168404Spjd *
937168404Spjd * On input, a cookie value of CHILDBIT indicates the tree is done.
938168404Spjd */
939168404Spjdvoid *
940168404Spjdavl_destroy_nodes(avl_tree_t *tree, void **cookie)
941168404Spjd{
942168404Spjd	avl_node_t	*node;
943168404Spjd	avl_node_t	*parent;
944168404Spjd	int		child;
945168404Spjd	void		*first;
946168404Spjd	size_t		off = tree->avl_offset;
947168404Spjd
948168404Spjd	/*
949168404Spjd	 * Initial calls go to the first node or it's right descendant.
950168404Spjd	 */
951168404Spjd	if (*cookie == NULL) {
952168404Spjd		first = avl_first(tree);
953168404Spjd
954168404Spjd		/*
955168404Spjd		 * deal with an empty tree
956168404Spjd		 */
957168404Spjd		if (first == NULL) {
958168404Spjd			*cookie = (void *)CHILDBIT;
959168404Spjd			return (NULL);
960168404Spjd		}
961168404Spjd
962168404Spjd		node = AVL_DATA2NODE(first, off);
963168404Spjd		parent = AVL_XPARENT(node);
964168404Spjd		goto check_right_side;
965168404Spjd	}
966168404Spjd
967168404Spjd	/*
968168404Spjd	 * If there is no parent to return to we are done.
969168404Spjd	 */
970168404Spjd	parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
971168404Spjd	if (parent == NULL) {
972168404Spjd		if (tree->avl_root != NULL) {
973168404Spjd			ASSERT(tree->avl_numnodes == 1);
974168404Spjd			tree->avl_root = NULL;
975168404Spjd			tree->avl_numnodes = 0;
976168404Spjd		}
977168404Spjd		return (NULL);
978168404Spjd	}
979168404Spjd
980168404Spjd	/*
981168404Spjd	 * Remove the child pointer we just visited from the parent and tree.
982168404Spjd	 */
983168404Spjd	child = (uintptr_t)(*cookie) & CHILDBIT;
984168404Spjd	parent->avl_child[child] = NULL;
985168404Spjd	ASSERT(tree->avl_numnodes > 1);
986168404Spjd	--tree->avl_numnodes;
987168404Spjd
988168404Spjd	/*
989168404Spjd	 * If we just did a right child or there isn't one, go up to parent.
990168404Spjd	 */
991168404Spjd	if (child == 1 || parent->avl_child[1] == NULL) {
992168404Spjd		node = parent;
993168404Spjd		parent = AVL_XPARENT(parent);
994168404Spjd		goto done;
995168404Spjd	}
996168404Spjd
997168404Spjd	/*
998168404Spjd	 * Do parent's right child, then leftmost descendent.
999168404Spjd	 */
1000168404Spjd	node = parent->avl_child[1];
1001168404Spjd	while (node->avl_child[0] != NULL) {
1002168404Spjd		parent = node;
1003168404Spjd		node = node->avl_child[0];
1004168404Spjd	}
1005168404Spjd
1006168404Spjd	/*
1007168404Spjd	 * If here, we moved to a left child. It may have one
1008168404Spjd	 * child on the right (when balance == +1).
1009168404Spjd	 */
1010168404Spjdcheck_right_side:
1011168404Spjd	if (node->avl_child[1] != NULL) {
1012168404Spjd		ASSERT(AVL_XBALANCE(node) == 1);
1013168404Spjd		parent = node;
1014168404Spjd		node = node->avl_child[1];
1015168404Spjd		ASSERT(node->avl_child[0] == NULL &&
1016168404Spjd		    node->avl_child[1] == NULL);
1017168404Spjd	} else {
1018168404Spjd		ASSERT(AVL_XBALANCE(node) <= 0);
1019168404Spjd	}
1020168404Spjd
1021168404Spjddone:
1022168404Spjd	if (parent == NULL) {
1023168404Spjd		*cookie = (void *)CHILDBIT;
1024168404Spjd		ASSERT(node == tree->avl_root);
1025168404Spjd	} else {
1026168404Spjd		*cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
1027168404Spjd	}
1028168404Spjd
1029168404Spjd	return (AVL_NODE2DATA(node, off));
1030168404Spjd}
1031