1/* hash - hashing table processing. 2 3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006 Free 4 Software Foundation, Inc. 5 6 Written by Jim Meyering, 1992. 7 8 This program is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 2, or (at your option) 11 any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program; if not, write to the Free Software Foundation, 20 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 21 22/* A generic hash table package. */ 23 24/* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead 25 of malloc. If you change USE_OBSTACK, you have to recompile! */ 26 27#include <config.h> 28 29#include "hash.h" 30#include "xalloc.h" 31 32#include <limits.h> 33#include <stdio.h> 34#include <stdlib.h> 35 36#if USE_OBSTACK 37# include "obstack.h" 38# ifndef obstack_chunk_alloc 39# define obstack_chunk_alloc malloc 40# endif 41# ifndef obstack_chunk_free 42# define obstack_chunk_free free 43# endif 44#endif 45 46#ifndef SIZE_MAX 47# define SIZE_MAX ((size_t) -1) 48#endif 49 50struct hash_table 51 { 52 /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1, 53 for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets 54 are not empty, there are N_ENTRIES active entries in the table. */ 55 struct hash_entry *bucket; 56 struct hash_entry const *bucket_limit; 57 size_t n_buckets; 58 size_t n_buckets_used; 59 size_t n_entries; 60 61 /* Tuning arguments, kept in a physicaly separate structure. */ 62 const Hash_tuning *tuning; 63 64 /* Three functions are given to `hash_initialize', see the documentation 65 block for this function. In a word, HASHER randomizes a user entry 66 into a number up from 0 up to some maximum minus 1; COMPARATOR returns 67 true if two user entries compare equally; and DATA_FREER is the cleanup 68 function for a user entry. */ 69 Hash_hasher hasher; 70 Hash_comparator comparator; 71 Hash_data_freer data_freer; 72 73 /* A linked list of freed struct hash_entry structs. */ 74 struct hash_entry *free_entry_list; 75 76#if USE_OBSTACK 77 /* Whenever obstacks are used, it is possible to allocate all overflowed 78 entries into a single stack, so they all can be freed in a single 79 operation. It is not clear if the speedup is worth the trouble. */ 80 struct obstack entry_stack; 81#endif 82 }; 83 84/* A hash table contains many internal entries, each holding a pointer to 85 some user provided data (also called a user entry). An entry indistinctly 86 refers to both the internal entry and its associated user entry. A user 87 entry contents may be hashed by a randomization function (the hashing 88 function, or just `hasher' for short) into a number (or `slot') between 0 89 and the current table size. At each slot position in the hash table, 90 starts a linked chain of entries for which the user data all hash to this 91 slot. A bucket is the collection of all entries hashing to the same slot. 92 93 A good `hasher' function will distribute entries rather evenly in buckets. 94 In the ideal case, the length of each bucket is roughly the number of 95 entries divided by the table size. Finding the slot for a data is usually 96 done in constant time by the `hasher', and the later finding of a precise 97 entry is linear in time with the size of the bucket. Consequently, a 98 larger hash table size (that is, a larger number of buckets) is prone to 99 yielding shorter chains, *given* the `hasher' function behaves properly. 100 101 Long buckets slow down the lookup algorithm. One might use big hash table 102 sizes in hope to reduce the average length of buckets, but this might 103 become inordinate, as unused slots in the hash table take some space. The 104 best bet is to make sure you are using a good `hasher' function (beware 105 that those are not that easy to write! :-), and to use a table size 106 larger than the actual number of entries. */ 107 108/* If an insertion makes the ratio of nonempty buckets to table size larger 109 than the growth threshold (a number between 0.0 and 1.0), then increase 110 the table size by multiplying by the growth factor (a number greater than 111 1.0). The growth threshold defaults to 0.8, and the growth factor 112 defaults to 1.414, meaning that the table will have doubled its size 113 every second time 80% of the buckets get used. */ 114#define DEFAULT_GROWTH_THRESHOLD 0.8 115#define DEFAULT_GROWTH_FACTOR 1.414 116 117/* If a deletion empties a bucket and causes the ratio of used buckets to 118 table size to become smaller than the shrink threshold (a number between 119 0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a 120 number greater than the shrink threshold but smaller than 1.0). The shrink 121 threshold and factor default to 0.0 and 1.0, meaning that the table never 122 shrinks. */ 123#define DEFAULT_SHRINK_THRESHOLD 0.0 124#define DEFAULT_SHRINK_FACTOR 1.0 125 126/* Use this to initialize or reset a TUNING structure to 127 some sensible values. */ 128static const Hash_tuning default_tuning = 129 { 130 DEFAULT_SHRINK_THRESHOLD, 131 DEFAULT_SHRINK_FACTOR, 132 DEFAULT_GROWTH_THRESHOLD, 133 DEFAULT_GROWTH_FACTOR, 134 false 135 }; 136 137/* Information and lookup. */ 138 139/* The following few functions provide information about the overall hash 140 table organization: the number of entries, number of buckets and maximum 141 length of buckets. */ 142 143/* Return the number of buckets in the hash table. The table size, the total 144 number of buckets (used plus unused), or the maximum number of slots, are 145 the same quantity. */ 146 147size_t 148hash_get_n_buckets (const Hash_table *table) 149{ 150 return table->n_buckets; 151} 152 153/* Return the number of slots in use (non-empty buckets). */ 154 155size_t 156hash_get_n_buckets_used (const Hash_table *table) 157{ 158 return table->n_buckets_used; 159} 160 161/* Return the number of active entries. */ 162 163size_t 164hash_get_n_entries (const Hash_table *table) 165{ 166 return table->n_entries; 167} 168 169/* Return the length of the longest chain (bucket). */ 170 171size_t 172hash_get_max_bucket_length (const Hash_table *table) 173{ 174 struct hash_entry const *bucket; 175 size_t max_bucket_length = 0; 176 177 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) 178 { 179 if (bucket->data) 180 { 181 struct hash_entry const *cursor = bucket; 182 size_t bucket_length = 1; 183 184 while (cursor = cursor->next, cursor) 185 bucket_length++; 186 187 if (bucket_length > max_bucket_length) 188 max_bucket_length = bucket_length; 189 } 190 } 191 192 return max_bucket_length; 193} 194 195/* Do a mild validation of a hash table, by traversing it and checking two 196 statistics. */ 197 198bool 199hash_table_ok (const Hash_table *table) 200{ 201 struct hash_entry const *bucket; 202 size_t n_buckets_used = 0; 203 size_t n_entries = 0; 204 205 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) 206 { 207 if (bucket->data) 208 { 209 struct hash_entry const *cursor = bucket; 210 211 /* Count bucket head. */ 212 n_buckets_used++; 213 n_entries++; 214 215 /* Count bucket overflow. */ 216 while (cursor = cursor->next, cursor) 217 n_entries++; 218 } 219 } 220 221 if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries) 222 return true; 223 224 return false; 225} 226 227void 228hash_print_statistics (const Hash_table *table, FILE *stream) 229{ 230 size_t n_entries = hash_get_n_entries (table); 231 size_t n_buckets = hash_get_n_buckets (table); 232 size_t n_buckets_used = hash_get_n_buckets_used (table); 233 size_t max_bucket_length = hash_get_max_bucket_length (table); 234 235 fprintf (stream, "# entries: %lu\n", (unsigned long int) n_entries); 236 fprintf (stream, "# buckets: %lu\n", (unsigned long int) n_buckets); 237 fprintf (stream, "# buckets used: %lu (%.2f%%)\n", 238 (unsigned long int) n_buckets_used, 239 (100.0 * n_buckets_used) / n_buckets); 240 fprintf (stream, "max bucket length: %lu\n", 241 (unsigned long int) max_bucket_length); 242} 243 244/* If ENTRY matches an entry already in the hash table, return the 245 entry from the table. Otherwise, return NULL. */ 246 247void * 248hash_lookup (const Hash_table *table, const void *entry) 249{ 250 struct hash_entry const *bucket 251 = table->bucket + table->hasher (entry, table->n_buckets); 252 struct hash_entry const *cursor; 253 254 if (! (bucket < table->bucket_limit)) 255 abort (); 256 257 if (bucket->data == NULL) 258 return NULL; 259 260 for (cursor = bucket; cursor; cursor = cursor->next) 261 if (table->comparator (entry, cursor->data)) 262 return cursor->data; 263 264 return NULL; 265} 266 267/* Walking. */ 268 269/* The functions in this page traverse the hash table and process the 270 contained entries. For the traversal to work properly, the hash table 271 should not be resized nor modified while any particular entry is being 272 processed. In particular, entries should not be added or removed. */ 273 274/* Return the first data in the table, or NULL if the table is empty. */ 275 276void * 277hash_get_first (const Hash_table *table) 278{ 279 struct hash_entry const *bucket; 280 281 if (table->n_entries == 0) 282 return NULL; 283 284 for (bucket = table->bucket; ; bucket++) 285 if (! (bucket < table->bucket_limit)) 286 abort (); 287 else if (bucket->data) 288 return bucket->data; 289} 290 291/* Return the user data for the entry following ENTRY, where ENTRY has been 292 returned by a previous call to either `hash_get_first' or `hash_get_next'. 293 Return NULL if there are no more entries. */ 294 295void * 296hash_get_next (const Hash_table *table, const void *entry) 297{ 298 struct hash_entry const *bucket 299 = table->bucket + table->hasher (entry, table->n_buckets); 300 struct hash_entry const *cursor; 301 302 if (! (bucket < table->bucket_limit)) 303 abort (); 304 305 /* Find next entry in the same bucket. */ 306 for (cursor = bucket; cursor; cursor = cursor->next) 307 if (cursor->data == entry && cursor->next) 308 return cursor->next->data; 309 310 /* Find first entry in any subsequent bucket. */ 311 while (++bucket < table->bucket_limit) 312 if (bucket->data) 313 return bucket->data; 314 315 /* None found. */ 316 return NULL; 317} 318 319/* Fill BUFFER with pointers to active user entries in the hash table, then 320 return the number of pointers copied. Do not copy more than BUFFER_SIZE 321 pointers. */ 322 323size_t 324hash_get_entries (const Hash_table *table, void **buffer, 325 size_t buffer_size) 326{ 327 size_t counter = 0; 328 struct hash_entry const *bucket; 329 struct hash_entry const *cursor; 330 331 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) 332 { 333 if (bucket->data) 334 { 335 for (cursor = bucket; cursor; cursor = cursor->next) 336 { 337 if (counter >= buffer_size) 338 return counter; 339 buffer[counter++] = cursor->data; 340 } 341 } 342 } 343 344 return counter; 345} 346 347/* Call a PROCESSOR function for each entry of a hash table, and return the 348 number of entries for which the processor function returned success. A 349 pointer to some PROCESSOR_DATA which will be made available to each call to 350 the processor function. The PROCESSOR accepts two arguments: the first is 351 the user entry being walked into, the second is the value of PROCESSOR_DATA 352 as received. The walking continue for as long as the PROCESSOR function 353 returns nonzero. When it returns zero, the walking is interrupted. */ 354 355size_t 356hash_do_for_each (const Hash_table *table, Hash_processor processor, 357 void *processor_data) 358{ 359 size_t counter = 0; 360 struct hash_entry const *bucket; 361 struct hash_entry const *cursor; 362 363 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) 364 { 365 if (bucket->data) 366 { 367 for (cursor = bucket; cursor; cursor = cursor->next) 368 { 369 if (!(*processor) (cursor->data, processor_data)) 370 return counter; 371 counter++; 372 } 373 } 374 } 375 376 return counter; 377} 378 379/* Allocation and clean-up. */ 380 381/* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1. 382 This is a convenience routine for constructing other hashing functions. */ 383 384#if USE_DIFF_HASH 385 386/* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see 387 B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm, 388 Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash 389 algorithms tend to be domain-specific, so what's good for [diffutils'] io.c 390 may not be good for your application." */ 391 392size_t 393hash_string (const char *string, size_t n_buckets) 394{ 395# define ROTATE_LEFT(Value, Shift) \ 396 ((Value) << (Shift) | (Value) >> ((sizeof (size_t) * CHAR_BIT) - (Shift))) 397# define HASH_ONE_CHAR(Value, Byte) \ 398 ((Byte) + ROTATE_LEFT (Value, 7)) 399 400 size_t value = 0; 401 unsigned char ch; 402 403 for (; (ch = *string); string++) 404 value = HASH_ONE_CHAR (value, ch); 405 return value % n_buckets; 406 407# undef ROTATE_LEFT 408# undef HASH_ONE_CHAR 409} 410 411#else /* not USE_DIFF_HASH */ 412 413/* This one comes from `recode', and performs a bit better than the above as 414 per a few experiments. It is inspired from a hashing routine found in the 415 very old Cyber `snoop', itself written in typical Greg Mansfield style. 416 (By the way, what happened to this excellent man? Is he still alive?) */ 417 418size_t 419hash_string (const char *string, size_t n_buckets) 420{ 421 size_t value = 0; 422 unsigned char ch; 423 424 for (; (ch = *string); string++) 425 value = (value * 31 + ch) % n_buckets; 426 return value; 427} 428 429#endif /* not USE_DIFF_HASH */ 430 431/* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd 432 number at least equal to 11. */ 433 434static bool 435is_prime (size_t candidate) 436{ 437 size_t divisor = 3; 438 size_t square = divisor * divisor; 439 440 while (square < candidate && (candidate % divisor)) 441 { 442 divisor++; 443 square += 4 * divisor; 444 divisor++; 445 } 446 447 return (candidate % divisor ? true : false); 448} 449 450/* Round a given CANDIDATE number up to the nearest prime, and return that 451 prime. Primes lower than 10 are merely skipped. */ 452 453static size_t 454next_prime (size_t candidate) 455{ 456 /* Skip small primes. */ 457 if (candidate < 10) 458 candidate = 10; 459 460 /* Make it definitely odd. */ 461 candidate |= 1; 462 463 while (!is_prime (candidate)) 464 candidate += 2; 465 466 return candidate; 467} 468 469void 470hash_reset_tuning (Hash_tuning *tuning) 471{ 472 *tuning = default_tuning; 473} 474 475/* For the given hash TABLE, check the user supplied tuning structure for 476 reasonable values, and return true if there is no gross error with it. 477 Otherwise, definitively reset the TUNING field to some acceptable default 478 in the hash table (that is, the user loses the right of further modifying 479 tuning arguments), and return false. */ 480 481static bool 482check_tuning (Hash_table *table) 483{ 484 const Hash_tuning *tuning = table->tuning; 485 486 /* Be a bit stricter than mathematics would require, so that 487 rounding errors in size calculations do not cause allocations to 488 fail to grow or shrink as they should. The smallest allocation 489 is 11 (due to next_prime's algorithm), so an epsilon of 0.1 490 should be good enough. */ 491 float epsilon = 0.1f; 492 493 if (epsilon < tuning->growth_threshold 494 && tuning->growth_threshold < 1 - epsilon 495 && 1 + epsilon < tuning->growth_factor 496 && 0 <= tuning->shrink_threshold 497 && tuning->shrink_threshold + epsilon < tuning->shrink_factor 498 && tuning->shrink_factor <= 1 499 && tuning->shrink_threshold + epsilon < tuning->growth_threshold) 500 return true; 501 502 table->tuning = &default_tuning; 503 return false; 504} 505 506/* Allocate and return a new hash table, or NULL upon failure. The initial 507 number of buckets is automatically selected so as to _guarantee_ that you 508 may insert at least CANDIDATE different user entries before any growth of 509 the hash table size occurs. So, if have a reasonably tight a-priori upper 510 bound on the number of entries you intend to insert in the hash table, you 511 may save some table memory and insertion time, by specifying it here. If 512 the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE 513 argument has its meaning changed to the wanted number of buckets. 514 515 TUNING points to a structure of user-supplied values, in case some fine 516 tuning is wanted over the default behavior of the hasher. If TUNING is 517 NULL, the default tuning parameters are used instead. 518 519 The user-supplied HASHER function should be provided. It accepts two 520 arguments ENTRY and TABLE_SIZE. It computes, by hashing ENTRY contents, a 521 slot number for that entry which should be in the range 0..TABLE_SIZE-1. 522 This slot number is then returned. 523 524 The user-supplied COMPARATOR function should be provided. It accepts two 525 arguments pointing to user data, it then returns true for a pair of entries 526 that compare equal, or false otherwise. This function is internally called 527 on entries which are already known to hash to the same bucket index. 528 529 The user-supplied DATA_FREER function, when not NULL, may be later called 530 with the user data as an argument, just before the entry containing the 531 data gets freed. This happens from within `hash_free' or `hash_clear'. 532 You should specify this function only if you want these functions to free 533 all of your `data' data. This is typically the case when your data is 534 simply an auxiliary struct that you have malloc'd to aggregate several 535 values. */ 536 537Hash_table * 538hash_initialize (size_t candidate, const Hash_tuning *tuning, 539 Hash_hasher hasher, Hash_comparator comparator, 540 Hash_data_freer data_freer) 541{ 542 Hash_table *table; 543 544 if (hasher == NULL || comparator == NULL) 545 return NULL; 546 547 table = malloc (sizeof *table); 548 if (table == NULL) 549 return NULL; 550 551 if (!tuning) 552 tuning = &default_tuning; 553 table->tuning = tuning; 554 if (!check_tuning (table)) 555 { 556 /* Fail if the tuning options are invalid. This is the only occasion 557 when the user gets some feedback about it. Once the table is created, 558 if the user provides invalid tuning options, we silently revert to 559 using the defaults, and ignore further request to change the tuning 560 options. */ 561 goto fail; 562 } 563 564 if (!tuning->is_n_buckets) 565 { 566 float new_candidate = candidate / tuning->growth_threshold; 567 if (SIZE_MAX <= new_candidate) 568 goto fail; 569 candidate = new_candidate; 570 } 571 572 if (xalloc_oversized (candidate, sizeof *table->bucket)) 573 goto fail; 574 table->n_buckets = next_prime (candidate); 575 if (xalloc_oversized (table->n_buckets, sizeof *table->bucket)) 576 goto fail; 577 578 table->bucket = calloc (table->n_buckets, sizeof *table->bucket); 579 table->bucket_limit = table->bucket + table->n_buckets; 580 table->n_buckets_used = 0; 581 table->n_entries = 0; 582 583 table->hasher = hasher; 584 table->comparator = comparator; 585 table->data_freer = data_freer; 586 587 table->free_entry_list = NULL; 588#if USE_OBSTACK 589 obstack_init (&table->entry_stack); 590#endif 591 return table; 592 593 fail: 594 free (table); 595 return NULL; 596} 597 598/* Make all buckets empty, placing any chained entries on the free list. 599 Apply the user-specified function data_freer (if any) to the datas of any 600 affected entries. */ 601 602void 603hash_clear (Hash_table *table) 604{ 605 struct hash_entry *bucket; 606 607 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) 608 { 609 if (bucket->data) 610 { 611 struct hash_entry *cursor; 612 struct hash_entry *next; 613 614 /* Free the bucket overflow. */ 615 for (cursor = bucket->next; cursor; cursor = next) 616 { 617 if (table->data_freer) 618 (*table->data_freer) (cursor->data); 619 cursor->data = NULL; 620 621 next = cursor->next; 622 /* Relinking is done one entry at a time, as it is to be expected 623 that overflows are either rare or short. */ 624 cursor->next = table->free_entry_list; 625 table->free_entry_list = cursor; 626 } 627 628 /* Free the bucket head. */ 629 if (table->data_freer) 630 (*table->data_freer) (bucket->data); 631 bucket->data = NULL; 632 bucket->next = NULL; 633 } 634 } 635 636 table->n_buckets_used = 0; 637 table->n_entries = 0; 638} 639 640/* Reclaim all storage associated with a hash table. If a data_freer 641 function has been supplied by the user when the hash table was created, 642 this function applies it to the data of each entry before freeing that 643 entry. */ 644 645void 646hash_free (Hash_table *table) 647{ 648 struct hash_entry *bucket; 649 struct hash_entry *cursor; 650 struct hash_entry *next; 651 652 /* Call the user data_freer function. */ 653 if (table->data_freer && table->n_entries) 654 { 655 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) 656 { 657 if (bucket->data) 658 { 659 for (cursor = bucket; cursor; cursor = cursor->next) 660 { 661 (*table->data_freer) (cursor->data); 662 } 663 } 664 } 665 } 666 667#if USE_OBSTACK 668 669 obstack_free (&table->entry_stack, NULL); 670 671#else 672 673 /* Free all bucket overflowed entries. */ 674 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) 675 { 676 for (cursor = bucket->next; cursor; cursor = next) 677 { 678 next = cursor->next; 679 free (cursor); 680 } 681 } 682 683 /* Also reclaim the internal list of previously freed entries. */ 684 for (cursor = table->free_entry_list; cursor; cursor = next) 685 { 686 next = cursor->next; 687 free (cursor); 688 } 689 690#endif 691 692 /* Free the remainder of the hash table structure. */ 693 free (table->bucket); 694 free (table); 695} 696 697/* Insertion and deletion. */ 698 699/* Get a new hash entry for a bucket overflow, possibly by reclying a 700 previously freed one. If this is not possible, allocate a new one. */ 701 702static struct hash_entry * 703allocate_entry (Hash_table *table) 704{ 705 struct hash_entry *new; 706 707 if (table->free_entry_list) 708 { 709 new = table->free_entry_list; 710 table->free_entry_list = new->next; 711 } 712 else 713 { 714#if USE_OBSTACK 715 new = obstack_alloc (&table->entry_stack, sizeof *new); 716#else 717 new = malloc (sizeof *new); 718#endif 719 } 720 721 return new; 722} 723 724/* Free a hash entry which was part of some bucket overflow, 725 saving it for later recycling. */ 726 727static void 728free_entry (Hash_table *table, struct hash_entry *entry) 729{ 730 entry->data = NULL; 731 entry->next = table->free_entry_list; 732 table->free_entry_list = entry; 733} 734 735/* This private function is used to help with insertion and deletion. When 736 ENTRY matches an entry in the table, return a pointer to the corresponding 737 user data and set *BUCKET_HEAD to the head of the selected bucket. 738 Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in 739 the table, unlink the matching entry. */ 740 741static void * 742hash_find_entry (Hash_table *table, const void *entry, 743 struct hash_entry **bucket_head, bool delete) 744{ 745 struct hash_entry *bucket 746 = table->bucket + table->hasher (entry, table->n_buckets); 747 struct hash_entry *cursor; 748 749 if (! (bucket < table->bucket_limit)) 750 abort (); 751 752 *bucket_head = bucket; 753 754 /* Test for empty bucket. */ 755 if (bucket->data == NULL) 756 return NULL; 757 758 /* See if the entry is the first in the bucket. */ 759 if ((*table->comparator) (entry, bucket->data)) 760 { 761 void *data = bucket->data; 762 763 if (delete) 764 { 765 if (bucket->next) 766 { 767 struct hash_entry *next = bucket->next; 768 769 /* Bump the first overflow entry into the bucket head, then save 770 the previous first overflow entry for later recycling. */ 771 *bucket = *next; 772 free_entry (table, next); 773 } 774 else 775 { 776 bucket->data = NULL; 777 } 778 } 779 780 return data; 781 } 782 783 /* Scan the bucket overflow. */ 784 for (cursor = bucket; cursor->next; cursor = cursor->next) 785 { 786 if ((*table->comparator) (entry, cursor->next->data)) 787 { 788 void *data = cursor->next->data; 789 790 if (delete) 791 { 792 struct hash_entry *next = cursor->next; 793 794 /* Unlink the entry to delete, then save the freed entry for later 795 recycling. */ 796 cursor->next = next->next; 797 free_entry (table, next); 798 } 799 800 return data; 801 } 802 } 803 804 /* No entry found. */ 805 return NULL; 806} 807 808/* For an already existing hash table, change the number of buckets through 809 specifying CANDIDATE. The contents of the hash table are preserved. The 810 new number of buckets is automatically selected so as to _guarantee_ that 811 the table may receive at least CANDIDATE different user entries, including 812 those already in the table, before any other growth of the hash table size 813 occurs. If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the 814 exact number of buckets desired. */ 815 816bool 817hash_rehash (Hash_table *table, size_t candidate) 818{ 819 Hash_table *new_table; 820 struct hash_entry *bucket; 821 struct hash_entry *cursor; 822 struct hash_entry *next; 823 824 new_table = hash_initialize (candidate, table->tuning, table->hasher, 825 table->comparator, table->data_freer); 826 if (new_table == NULL) 827 return false; 828 829 /* Merely reuse the extra old space into the new table. */ 830#if USE_OBSTACK 831 obstack_free (&new_table->entry_stack, NULL); 832 new_table->entry_stack = table->entry_stack; 833#endif 834 new_table->free_entry_list = table->free_entry_list; 835 836 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) 837 if (bucket->data) 838 for (cursor = bucket; cursor; cursor = next) 839 { 840 void *data = cursor->data; 841 struct hash_entry *new_bucket 842 = (new_table->bucket 843 + new_table->hasher (data, new_table->n_buckets)); 844 845 if (! (new_bucket < new_table->bucket_limit)) 846 abort (); 847 848 next = cursor->next; 849 850 if (new_bucket->data) 851 { 852 if (cursor == bucket) 853 { 854 /* Allocate or recycle an entry, when moving from a bucket 855 header into a bucket overflow. */ 856 struct hash_entry *new_entry = allocate_entry (new_table); 857 858 if (new_entry == NULL) 859 return false; 860 861 new_entry->data = data; 862 new_entry->next = new_bucket->next; 863 new_bucket->next = new_entry; 864 } 865 else 866 { 867 /* Merely relink an existing entry, when moving from a 868 bucket overflow into a bucket overflow. */ 869 cursor->next = new_bucket->next; 870 new_bucket->next = cursor; 871 } 872 } 873 else 874 { 875 /* Free an existing entry, when moving from a bucket 876 overflow into a bucket header. Also take care of the 877 simple case of moving from a bucket header into a bucket 878 header. */ 879 new_bucket->data = data; 880 new_table->n_buckets_used++; 881 if (cursor != bucket) 882 free_entry (new_table, cursor); 883 } 884 } 885 886 free (table->bucket); 887 table->bucket = new_table->bucket; 888 table->bucket_limit = new_table->bucket_limit; 889 table->n_buckets = new_table->n_buckets; 890 table->n_buckets_used = new_table->n_buckets_used; 891 table->free_entry_list = new_table->free_entry_list; 892 /* table->n_entries already holds its value. */ 893#if USE_OBSTACK 894 table->entry_stack = new_table->entry_stack; 895#endif 896 free (new_table); 897 898 return true; 899} 900 901/* If ENTRY matches an entry already in the hash table, return the pointer 902 to the entry from the table. Otherwise, insert ENTRY and return ENTRY. 903 Return NULL if the storage required for insertion cannot be allocated. */ 904 905void * 906hash_insert (Hash_table *table, const void *entry) 907{ 908 void *data; 909 struct hash_entry *bucket; 910 911 /* The caller cannot insert a NULL entry. */ 912 if (! entry) 913 abort (); 914 915 /* If there's a matching entry already in the table, return that. */ 916 if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL) 917 return data; 918 919 /* ENTRY is not matched, it should be inserted. */ 920 921 if (bucket->data) 922 { 923 struct hash_entry *new_entry = allocate_entry (table); 924 925 if (new_entry == NULL) 926 return NULL; 927 928 /* Add ENTRY in the overflow of the bucket. */ 929 930 new_entry->data = (void *) entry; 931 new_entry->next = bucket->next; 932 bucket->next = new_entry; 933 table->n_entries++; 934 return (void *) entry; 935 } 936 937 /* Add ENTRY right in the bucket head. */ 938 939 bucket->data = (void *) entry; 940 table->n_entries++; 941 table->n_buckets_used++; 942 943 /* If the growth threshold of the buckets in use has been reached, increase 944 the table size and rehash. There's no point in checking the number of 945 entries: if the hashing function is ill-conditioned, rehashing is not 946 likely to improve it. */ 947 948 if (table->n_buckets_used 949 > table->tuning->growth_threshold * table->n_buckets) 950 { 951 /* Check more fully, before starting real work. If tuning arguments 952 became invalid, the second check will rely on proper defaults. */ 953 check_tuning (table); 954 if (table->n_buckets_used 955 > table->tuning->growth_threshold * table->n_buckets) 956 { 957 const Hash_tuning *tuning = table->tuning; 958 float candidate = 959 (tuning->is_n_buckets 960 ? (table->n_buckets * tuning->growth_factor) 961 : (table->n_buckets * tuning->growth_factor 962 * tuning->growth_threshold)); 963 964 if (SIZE_MAX <= candidate) 965 return NULL; 966 967 /* If the rehash fails, arrange to return NULL. */ 968 if (!hash_rehash (table, candidate)) 969 entry = NULL; 970 } 971 } 972 973 return (void *) entry; 974} 975 976/* If ENTRY is already in the table, remove it and return the just-deleted 977 data (the user may want to deallocate its storage). If ENTRY is not in the 978 table, don't modify the table and return NULL. */ 979 980void * 981hash_delete (Hash_table *table, const void *entry) 982{ 983 void *data; 984 struct hash_entry *bucket; 985 986 data = hash_find_entry (table, entry, &bucket, true); 987 if (!data) 988 return NULL; 989 990 table->n_entries--; 991 if (!bucket->data) 992 { 993 table->n_buckets_used--; 994 995 /* If the shrink threshold of the buckets in use has been reached, 996 rehash into a smaller table. */ 997 998 if (table->n_buckets_used 999 < table->tuning->shrink_threshold * table->n_buckets) 1000 { 1001 /* Check more fully, before starting real work. If tuning arguments 1002 became invalid, the second check will rely on proper defaults. */ 1003 check_tuning (table); 1004 if (table->n_buckets_used 1005 < table->tuning->shrink_threshold * table->n_buckets) 1006 { 1007 const Hash_tuning *tuning = table->tuning; 1008 size_t candidate = 1009 (tuning->is_n_buckets 1010 ? table->n_buckets * tuning->shrink_factor 1011 : (table->n_buckets * tuning->shrink_factor 1012 * tuning->growth_threshold)); 1013 1014 hash_rehash (table, candidate); 1015 } 1016 } 1017 } 1018 1019 return data; 1020} 1021 1022/* Testing. */ 1023 1024#if TESTING 1025 1026void 1027hash_print (const Hash_table *table) 1028{ 1029 struct hash_entry const *bucket; 1030 1031 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) 1032 { 1033 struct hash_entry *cursor; 1034 1035 if (bucket) 1036 printf ("%lu:\n", (unsigned long int) (bucket - table->bucket)); 1037 1038 for (cursor = bucket; cursor; cursor = cursor->next) 1039 { 1040 char const *s = cursor->data; 1041 /* FIXME */ 1042 if (s) 1043 printf (" %s\n", s); 1044 } 1045 } 1046} 1047 1048#endif /* TESTING */ 1049