1/* Copyright (C) 1995, 1996, 1997, 2000, 2006 Free Software Foundation, Inc. 2 Contributed by Bernd Schmidt <crux@Pool.Informatik.RWTH-Aachen.DE>, 1997. 3 4 NOTE: The canonical source of this file is maintained with the GNU C 5 Library. Bugs can be reported to bug-glibc@gnu.org. 6 7 This program is free software; you can redistribute it and/or modify it 8 under the terms of the GNU Library General Public License as published 9 by the Free Software Foundation; either version 2, or (at your option) 10 any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 Library General Public License for more details. 16 17 You should have received a copy of the GNU Library General Public 18 License along with this program; if not, write to the Free Software 19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 20 USA. */ 21 22/* Tree search for red/black trees. 23 The algorithm for adding nodes is taken from one of the many "Algorithms" 24 books by Robert Sedgewick, although the implementation differs. 25 The algorithm for deleting nodes can probably be found in a book named 26 "Introduction to Algorithms" by Cormen/Leiserson/Rivest. At least that's 27 the book that my professor took most algorithms from during the "Data 28 Structures" course... 29 30 Totally public domain. */ 31 32/* Red/black trees are binary trees in which the edges are colored either red 33 or black. They have the following properties: 34 1. The number of black edges on every path from the root to a leaf is 35 constant. 36 2. No two red edges are adjacent. 37 Therefore there is an upper bound on the length of every path, it's 38 O(log n) where n is the number of nodes in the tree. No path can be longer 39 than 1+2*P where P is the length of the shortest path in the tree. 40 Useful for the implementation: 41 3. If one of the children of a node is NULL, then the other one is red 42 (if it exists). 43 44 In the implementation, not the edges are colored, but the nodes. The color 45 interpreted as the color of the edge leading to this node. The color is 46 meaningless for the root node, but we color the root node black for 47 convenience. All added nodes are red initially. 48 49 Adding to a red/black tree is rather easy. The right place is searched 50 with a usual binary tree search. Additionally, whenever a node N is 51 reached that has two red successors, the successors are colored black and 52 the node itself colored red. This moves red edges up the tree where they 53 pose less of a problem once we get to really insert the new node. Changing 54 N's color to red may violate rule 2, however, so rotations may become 55 necessary to restore the invariants. Adding a new red leaf may violate 56 the same rule, so afterwards an additional check is run and the tree 57 possibly rotated. 58 59 Deleting is hairy. There are mainly two nodes involved: the node to be 60 deleted (n1), and another node that is to be unchained from the tree (n2). 61 If n1 has a successor (the node with a smallest key that is larger than 62 n1), then the successor becomes n2 and its contents are copied into n1, 63 otherwise n1 becomes n2. 64 Unchaining a node may violate rule 1: if n2 is black, one subtree is 65 missing one black edge afterwards. The algorithm must try to move this 66 error upwards towards the root, so that the subtree that does not have 67 enough black edges becomes the whole tree. Once that happens, the error 68 has disappeared. It may not be necessary to go all the way up, since it 69 is possible that rotations and recoloring can fix the error before that. 70 71 Although the deletion algorithm must walk upwards through the tree, we 72 do not store parent pointers in the nodes. Instead, delete allocates a 73 small array of parent pointers and fills it while descending the tree. 74 Since we know that the length of a path is O(log n), where n is the number 75 of nodes, this is likely to use less memory. */ 76 77/* Tree rotations look like this: 78 A C 79 / \ / \ 80 B C A G 81 / \ / \ --> / \ 82 D E F G B F 83 / \ 84 D E 85 86 In this case, A has been rotated left. This preserves the ordering of the 87 binary tree. */ 88 89#include <config.h> 90 91/* Specification. */ 92#ifdef IN_LIBINTL 93# include "tsearch.h" 94#else 95# include <search.h> 96#endif 97 98#include <stdlib.h> 99 100typedef int (*__compar_fn_t) (const void *, const void *); 101typedef void (*__action_fn_t) (const void *, VISIT, int); 102 103#ifndef weak_alias 104# define __tsearch tsearch 105# define __tfind tfind 106# define __tdelete tdelete 107# define __twalk twalk 108#endif 109 110#ifndef internal_function 111/* Inside GNU libc we mark some function in a special way. In other 112 environments simply ignore the marking. */ 113# define internal_function 114#endif 115 116typedef struct node_t 117{ 118 /* Callers expect this to be the first element in the structure - do not 119 move! */ 120 const void *key; 121 struct node_t *left; 122 struct node_t *right; 123 unsigned int red:1; 124} *node; 125typedef const struct node_t *const_node; 126 127#undef DEBUGGING 128 129#ifdef DEBUGGING 130 131/* Routines to check tree invariants. */ 132 133#include <assert.h> 134 135#define CHECK_TREE(a) check_tree(a) 136 137static void 138check_tree_recurse (node p, int d_sofar, int d_total) 139{ 140 if (p == NULL) 141 { 142 assert (d_sofar == d_total); 143 return; 144 } 145 146 check_tree_recurse (p->left, d_sofar + (p->left && !p->left->red), d_total); 147 check_tree_recurse (p->right, d_sofar + (p->right && !p->right->red), d_total); 148 if (p->left) 149 assert (!(p->left->red && p->red)); 150 if (p->right) 151 assert (!(p->right->red && p->red)); 152} 153 154static void 155check_tree (node root) 156{ 157 int cnt = 0; 158 node p; 159 if (root == NULL) 160 return; 161 root->red = 0; 162 for(p = root->left; p; p = p->left) 163 cnt += !p->red; 164 check_tree_recurse (root, 0, cnt); 165} 166 167 168#else 169 170#define CHECK_TREE(a) 171 172#endif 173 174/* Possibly "split" a node with two red successors, and/or fix up two red 175 edges in a row. ROOTP is a pointer to the lowest node we visited, PARENTP 176 and GPARENTP pointers to its parent/grandparent. P_R and GP_R contain the 177 comparison values that determined which way was taken in the tree to reach 178 ROOTP. MODE is 1 if we need not do the split, but must check for two red 179 edges between GPARENTP and ROOTP. */ 180static void 181maybe_split_for_insert (node *rootp, node *parentp, node *gparentp, 182 int p_r, int gp_r, int mode) 183{ 184 node root = *rootp; 185 node *rp, *lp; 186 rp = &(*rootp)->right; 187 lp = &(*rootp)->left; 188 189 /* See if we have to split this node (both successors red). */ 190 if (mode == 1 191 || ((*rp) != NULL && (*lp) != NULL && (*rp)->red && (*lp)->red)) 192 { 193 /* This node becomes red, its successors black. */ 194 root->red = 1; 195 if (*rp) 196 (*rp)->red = 0; 197 if (*lp) 198 (*lp)->red = 0; 199 200 /* If the parent of this node is also red, we have to do 201 rotations. */ 202 if (parentp != NULL && (*parentp)->red) 203 { 204 node gp = *gparentp; 205 node p = *parentp; 206 /* There are two main cases: 207 1. The edge types (left or right) of the two red edges differ. 208 2. Both red edges are of the same type. 209 There exist two symmetries of each case, so there is a total of 210 4 cases. */ 211 if ((p_r > 0) != (gp_r > 0)) 212 { 213 /* Put the child at the top of the tree, with its parent 214 and grandparent as successors. */ 215 p->red = 1; 216 gp->red = 1; 217 root->red = 0; 218 if (p_r < 0) 219 { 220 /* Child is left of parent. */ 221 p->left = *rp; 222 *rp = p; 223 gp->right = *lp; 224 *lp = gp; 225 } 226 else 227 { 228 /* Child is right of parent. */ 229 p->right = *lp; 230 *lp = p; 231 gp->left = *rp; 232 *rp = gp; 233 } 234 *gparentp = root; 235 } 236 else 237 { 238 *gparentp = *parentp; 239 /* Parent becomes the top of the tree, grandparent and 240 child are its successors. */ 241 p->red = 0; 242 gp->red = 1; 243 if (p_r < 0) 244 { 245 /* Left edges. */ 246 gp->left = p->right; 247 p->right = gp; 248 } 249 else 250 { 251 /* Right edges. */ 252 gp->right = p->left; 253 p->left = gp; 254 } 255 } 256 } 257 } 258} 259 260/* Find or insert datum into search tree. 261 KEY is the key to be located, ROOTP is the address of tree root, 262 COMPAR the ordering function. */ 263void * 264__tsearch (const void *key, void **vrootp, __compar_fn_t compar) 265{ 266 node q; 267 node *parentp = NULL, *gparentp = NULL; 268 node *rootp = (node *) vrootp; 269 node *nextp; 270 int r = 0, p_r = 0, gp_r = 0; /* No they might not, Mr Compiler. */ 271 272 if (rootp == NULL) 273 return NULL; 274 275 /* This saves some additional tests below. */ 276 if (*rootp != NULL) 277 (*rootp)->red = 0; 278 279 CHECK_TREE (*rootp); 280 281 nextp = rootp; 282 while (*nextp != NULL) 283 { 284 node root = *rootp; 285 r = (*compar) (key, root->key); 286 if (r == 0) 287 return root; 288 289 maybe_split_for_insert (rootp, parentp, gparentp, p_r, gp_r, 0); 290 /* If that did any rotations, parentp and gparentp are now garbage. 291 That doesn't matter, because the values they contain are never 292 used again in that case. */ 293 294 nextp = r < 0 ? &root->left : &root->right; 295 if (*nextp == NULL) 296 break; 297 298 gparentp = parentp; 299 parentp = rootp; 300 rootp = nextp; 301 302 gp_r = p_r; 303 p_r = r; 304 } 305 306 q = (struct node_t *) malloc (sizeof (struct node_t)); 307 if (q != NULL) 308 { 309 *nextp = q; /* link new node to old */ 310 q->key = key; /* initialize new node */ 311 q->red = 1; 312 q->left = q->right = NULL; 313 314 if (nextp != rootp) 315 /* There may be two red edges in a row now, which we must avoid by 316 rotating the tree. */ 317 maybe_split_for_insert (nextp, rootp, parentp, r, p_r, 1); 318 } 319 320 return q; 321} 322#ifdef weak_alias 323weak_alias (__tsearch, tsearch) 324#endif 325 326 327/* Find datum in search tree. 328 KEY is the key to be located, ROOTP is the address of tree root, 329 COMPAR the ordering function. */ 330void * 331__tfind (key, vrootp, compar) 332 const void *key; 333 void *const *vrootp; 334 __compar_fn_t compar; 335{ 336 node *rootp = (node *) vrootp; 337 338 if (rootp == NULL) 339 return NULL; 340 341 CHECK_TREE (*rootp); 342 343 while (*rootp != NULL) 344 { 345 node root = *rootp; 346 int r; 347 348 r = (*compar) (key, root->key); 349 if (r == 0) 350 return root; 351 352 rootp = r < 0 ? &root->left : &root->right; 353 } 354 return NULL; 355} 356#ifdef weak_alias 357weak_alias (__tfind, tfind) 358#endif 359 360 361/* Delete node with given key. 362 KEY is the key to be deleted, ROOTP is the address of the root of tree, 363 COMPAR the comparison function. */ 364void * 365__tdelete (const void *key, void **vrootp, __compar_fn_t compar) 366{ 367 node p, q, r, retval; 368 int cmp; 369 node *rootp = (node *) vrootp; 370 node root, unchained; 371 /* Stack of nodes so we remember the parents without recursion. It's 372 _very_ unlikely that there are paths longer than 40 nodes. The tree 373 would need to have around 250.000 nodes. */ 374 int stacksize = 100; 375 int sp = 0; 376 node *nodestack[100]; 377 378 if (rootp == NULL) 379 return NULL; 380 p = *rootp; 381 if (p == NULL) 382 return NULL; 383 384 CHECK_TREE (p); 385 386 while ((cmp = (*compar) (key, (*rootp)->key)) != 0) 387 { 388 if (sp == stacksize) 389 abort (); 390 391 nodestack[sp++] = rootp; 392 p = *rootp; 393 rootp = ((cmp < 0) 394 ? &(*rootp)->left 395 : &(*rootp)->right); 396 if (*rootp == NULL) 397 return NULL; 398 } 399 400 /* This is bogus if the node to be deleted is the root... this routine 401 really should return an integer with 0 for success, -1 for failure 402 and errno = ESRCH or something. */ 403 retval = p; 404 405 /* We don't unchain the node we want to delete. Instead, we overwrite 406 it with its successor and unchain the successor. If there is no 407 successor, we really unchain the node to be deleted. */ 408 409 root = *rootp; 410 411 r = root->right; 412 q = root->left; 413 414 if (q == NULL || r == NULL) 415 unchained = root; 416 else 417 { 418 node *parent = rootp, *up = &root->right; 419 for (;;) 420 { 421 if (sp == stacksize) 422 abort (); 423 nodestack[sp++] = parent; 424 parent = up; 425 if ((*up)->left == NULL) 426 break; 427 up = &(*up)->left; 428 } 429 unchained = *up; 430 } 431 432 /* We know that either the left or right successor of UNCHAINED is NULL. 433 R becomes the other one, it is chained into the parent of UNCHAINED. */ 434 r = unchained->left; 435 if (r == NULL) 436 r = unchained->right; 437 if (sp == 0) 438 *rootp = r; 439 else 440 { 441 q = *nodestack[sp-1]; 442 if (unchained == q->right) 443 q->right = r; 444 else 445 q->left = r; 446 } 447 448 if (unchained != root) 449 root->key = unchained->key; 450 if (!unchained->red) 451 { 452 /* Now we lost a black edge, which means that the number of black 453 edges on every path is no longer constant. We must balance the 454 tree. */ 455 /* NODESTACK now contains all parents of R. R is likely to be NULL 456 in the first iteration. */ 457 /* NULL nodes are considered black throughout - this is necessary for 458 correctness. */ 459 while (sp > 0 && (r == NULL || !r->red)) 460 { 461 node *pp = nodestack[sp - 1]; 462 p = *pp; 463 /* Two symmetric cases. */ 464 if (r == p->left) 465 { 466 /* Q is R's brother, P is R's parent. The subtree with root 467 R has one black edge less than the subtree with root Q. */ 468 q = p->right; 469 if (q->red) 470 { 471 /* If Q is red, we know that P is black. We rotate P left 472 so that Q becomes the top node in the tree, with P below 473 it. P is colored red, Q is colored black. 474 This action does not change the black edge count for any 475 leaf in the tree, but we will be able to recognize one 476 of the following situations, which all require that Q 477 is black. */ 478 q->red = 0; 479 p->red = 1; 480 /* Left rotate p. */ 481 p->right = q->left; 482 q->left = p; 483 *pp = q; 484 /* Make sure pp is right if the case below tries to use 485 it. */ 486 nodestack[sp++] = pp = &q->left; 487 q = p->right; 488 } 489 /* We know that Q can't be NULL here. We also know that Q is 490 black. */ 491 if ((q->left == NULL || !q->left->red) 492 && (q->right == NULL || !q->right->red)) 493 { 494 /* Q has two black successors. We can simply color Q red. 495 The whole subtree with root P is now missing one black 496 edge. Note that this action can temporarily make the 497 tree invalid (if P is red). But we will exit the loop 498 in that case and set P black, which both makes the tree 499 valid and also makes the black edge count come out 500 right. If P is black, we are at least one step closer 501 to the root and we'll try again the next iteration. */ 502 q->red = 1; 503 r = p; 504 } 505 else 506 { 507 /* Q is black, one of Q's successors is red. We can 508 repair the tree with one operation and will exit the 509 loop afterwards. */ 510 if (q->right == NULL || !q->right->red) 511 { 512 /* The left one is red. We perform the same action as 513 in maybe_split_for_insert where two red edges are 514 adjacent but point in different directions: 515 Q's left successor (let's call it Q2) becomes the 516 top of the subtree we are looking at, its parent (Q) 517 and grandparent (P) become its successors. The former 518 successors of Q2 are placed below P and Q. 519 P becomes black, and Q2 gets the color that P had. 520 This changes the black edge count only for node R and 521 its successors. */ 522 node q2 = q->left; 523 q2->red = p->red; 524 p->right = q2->left; 525 q->left = q2->right; 526 q2->right = q; 527 q2->left = p; 528 *pp = q2; 529 p->red = 0; 530 } 531 else 532 { 533 /* It's the right one. Rotate P left. P becomes black, 534 and Q gets the color that P had. Q's right successor 535 also becomes black. This changes the black edge 536 count only for node R and its successors. */ 537 q->red = p->red; 538 p->red = 0; 539 540 q->right->red = 0; 541 542 /* left rotate p */ 543 p->right = q->left; 544 q->left = p; 545 *pp = q; 546 } 547 548 /* We're done. */ 549 sp = 1; 550 r = NULL; 551 } 552 } 553 else 554 { 555 /* Comments: see above. */ 556 q = p->left; 557 if (q->red) 558 { 559 q->red = 0; 560 p->red = 1; 561 p->left = q->right; 562 q->right = p; 563 *pp = q; 564 nodestack[sp++] = pp = &q->right; 565 q = p->left; 566 } 567 if ((q->right == NULL || !q->right->red) 568 && (q->left == NULL || !q->left->red)) 569 { 570 q->red = 1; 571 r = p; 572 } 573 else 574 { 575 if (q->left == NULL || !q->left->red) 576 { 577 node q2 = q->right; 578 q2->red = p->red; 579 p->left = q2->right; 580 q->right = q2->left; 581 q2->left = q; 582 q2->right = p; 583 *pp = q2; 584 p->red = 0; 585 } 586 else 587 { 588 q->red = p->red; 589 p->red = 0; 590 q->left->red = 0; 591 p->left = q->right; 592 q->right = p; 593 *pp = q; 594 } 595 sp = 1; 596 r = NULL; 597 } 598 } 599 --sp; 600 } 601 if (r != NULL) 602 r->red = 0; 603 } 604 605 free (unchained); 606 return retval; 607} 608#ifdef weak_alias 609weak_alias (__tdelete, tdelete) 610#endif 611 612 613/* Walk the nodes of a tree. 614 ROOT is the root of the tree to be walked, ACTION the function to be 615 called at each node. LEVEL is the level of ROOT in the whole tree. */ 616static void 617internal_function 618trecurse (const void *vroot, __action_fn_t action, int level) 619{ 620 const_node root = (const_node) vroot; 621 622 if (root->left == NULL && root->right == NULL) 623 (*action) (root, leaf, level); 624 else 625 { 626 (*action) (root, preorder, level); 627 if (root->left != NULL) 628 trecurse (root->left, action, level + 1); 629 (*action) (root, postorder, level); 630 if (root->right != NULL) 631 trecurse (root->right, action, level + 1); 632 (*action) (root, endorder, level); 633 } 634} 635 636 637/* Walk the nodes of a tree. 638 ROOT is the root of the tree to be walked, ACTION the function to be 639 called at each node. */ 640void 641__twalk (const void *vroot, __action_fn_t action) 642{ 643 const_node root = (const_node) vroot; 644 645 CHECK_TREE (root); 646 647 if (root != NULL && action != NULL) 648 trecurse (root, action, 0); 649} 650#ifdef weak_alias 651weak_alias (__twalk, twalk) 652#endif 653 654 655#ifdef _LIBC 656 657/* The standardized functions miss an important functionality: the 658 tree cannot be removed easily. We provide a function to do this. */ 659static void 660internal_function 661tdestroy_recurse (node root, __free_fn_t freefct) 662{ 663 if (root->left != NULL) 664 tdestroy_recurse (root->left, freefct); 665 if (root->right != NULL) 666 tdestroy_recurse (root->right, freefct); 667 (*freefct) ((void *) root->key); 668 /* Free the node itself. */ 669 free (root); 670} 671 672void 673__tdestroy (void *vroot, __free_fn_t freefct) 674{ 675 node root = (node) vroot; 676 677 CHECK_TREE (root); 678 679 if (root != NULL) 680 tdestroy_recurse (root, freefct); 681} 682weak_alias (__tdestroy, tdestroy) 683 684#endif /* _LIBC */ 685