1/* Hash tables. 2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 3 2009 Free Software Foundation, Inc. 4 5This file is part of GNU Wget. 6 7GNU Wget is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 3 of the License, or (at 10your option) any later version. 11 12GNU Wget is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with Wget. If not, see <http://www.gnu.org/licenses/>. 19 20Additional permission under GNU GPL version 3 section 7 21 22If you modify this program, or any covered work, by linking or 23combining it with the OpenSSL project's OpenSSL library (or a 24modified version of that library), containing parts covered by the 25terms of the OpenSSL or SSLeay licenses, the Free Software Foundation 26grants you additional permission to convey the resulting work. 27Corresponding Source for a non-source form of such a combination 28shall include the source code for the parts of OpenSSL used as well 29as that of the covered work. */ 30 31/* With -DSTANDALONE, this file can be compiled outside Wget source 32 tree. To test, also use -DTEST. */ 33 34#ifndef STANDALONE 35# include "wget.h" 36#endif 37 38#include <stdio.h> 39#include <stdlib.h> 40#include <assert.h> 41#include <string.h> 42#include <limits.h> 43 44#ifndef STANDALONE 45/* Get Wget's utility headers. */ 46# include "utils.h" 47#else 48/* Make do without them. */ 49# define xnew(x) xmalloc (sizeof (x)) 50# define xnew_array(type, x) xmalloc (sizeof (type) * (x)) 51# define xmalloc malloc 52# define xfree free 53# ifndef countof 54# define countof(x) (sizeof (x) / sizeof ((x)[0])) 55# endif 56# include <ctype.h> 57# define c_tolower(x) tolower ((unsigned char) (x)) 58# ifdef HAVE_STDINT_H 59# include <stdint.h> 60# else 61 typedef unsigned long uintptr_t; 62# endif 63#endif 64 65#include "hash.h" 66 67/* INTERFACE: 68 69 Hash tables are a technique used to implement mapping between 70 objects with near-constant-time access and storage. The table 71 associates keys to values, and a value can be very quickly 72 retrieved by providing the key. Fast lookup tables are typically 73 implemented as hash tables. 74 75 The entry points are 76 hash_table_new -- creates the table. 77 hash_table_destroy -- destroys the table. 78 hash_table_put -- establishes or updates key->value mapping. 79 hash_table_get -- retrieves value of key. 80 hash_table_get_pair -- get key/value pair for key. 81 hash_table_contains -- test whether the table contains key. 82 hash_table_remove -- remove key->value mapping for given key. 83 hash_table_for_each -- call function for each table entry. 84 hash_table_iterate -- iterate over entries in hash table. 85 hash_table_iter_next -- return next element during iteration. 86 hash_table_clear -- clear hash table contents. 87 hash_table_count -- return the number of entries in the table. 88 89 The hash table grows internally as new entries are added and is not 90 limited in size, except by available memory. The table doubles 91 with each resize, which ensures that the amortized time per 92 operation remains constant. 93 94 If not instructed otherwise, tables created by hash_table_new 95 consider the keys to be equal if their pointer values are the same. 96 You can use make_string_hash_table to create tables whose keys are 97 considered equal if their string contents are the same. In the 98 general case, the criterion of equality used to compare keys is 99 specified at table creation time with two callback functions, 100 "hash" and "test". The hash function transforms the key into an 101 arbitrary number that must be the same for two equal keys. The 102 test function accepts two keys and returns non-zero if they are to 103 be considered equal. 104 105 Note that neither keys nor values are copied when inserted into the 106 hash table, so they must exist for the lifetime of the table. This 107 means that e.g. the use of static strings is OK, but objects with a 108 shorter life-time probably need to be copied (with strdup() or the 109 like in the case of strings) before being inserted. */ 110 111/* IMPLEMENTATION: 112 113 The hash table is implemented as an open-addressed table with 114 linear probing collision resolution. 115 116 The above means that all the cells (each cell containing a key and 117 a value pointer) are stored in a contiguous array. Array position 118 of each cell is determined by the hash value of its key and the 119 size of the table: location := hash(key) % size. If two different 120 keys end up on the same position (collide), the one that came 121 second is stored in the first unoccupied cell that follows it. 122 This collision resolution technique is called "linear probing". 123 124 There are more advanced collision resolution methods (quadratic 125 probing, double hashing), but we don't use them because they incur 126 more non-sequential access to the array, which results in worse CPU 127 cache behavior. Linear probing works well as long as the 128 count/size ratio (fullness) is kept below 75%. We make sure to 129 grow and rehash the table whenever this threshold is exceeded. 130 131 Collisions complicate deletion because simply clearing a cell 132 followed by previously collided entries would cause those neighbors 133 to not be picked up by find_cell later. One solution is to leave a 134 "tombstone" marker instead of clearing the cell, and another is to 135 recalculate the positions of adjacent cells. We take the latter 136 approach because it results in less bookkeeping garbage and faster 137 retrieval at the (slight) expense of deletion. */ 138 139/* Maximum allowed fullness: when hash table's fullness exceeds this 140 value, the table is resized. */ 141#define HASH_MAX_FULLNESS 0.75 142 143/* The hash table size is multiplied by this factor (and then rounded 144 to the next prime) with each resize. This guarantees infrequent 145 resizes. */ 146#define HASH_RESIZE_FACTOR 2 147 148struct cell { 149 void *key; 150 void *value; 151}; 152 153typedef unsigned long (*hashfun_t) (const void *); 154typedef int (*testfun_t) (const void *, const void *); 155 156struct hash_table { 157 hashfun_t hash_function; 158 testfun_t test_function; 159 160 struct cell *cells; /* contiguous array of cells. */ 161 int size; /* size of the array. */ 162 163 int count; /* number of occupied entries. */ 164 int resize_threshold; /* after size exceeds this number of 165 entries, resize the table. */ 166 int prime_offset; /* the offset of the current prime in 167 the prime table. */ 168}; 169 170/* We use the all-bits-set constant (INVALID_PTR) marker to mean that 171 a cell is empty. It is unaligned and therefore illegal as a 172 pointer. INVALID_PTR_CHAR (0xff) is the single-character constant 173 used to initialize the entire cells array as empty. 174 175 The all-bits-set value is a better choice than NULL because it 176 allows the use of NULL/0 keys. Since the keys are either integers 177 or pointers, the only key that cannot be used is the integer value 178 -1. This is acceptable because it still allows the use of 179 nonnegative integer keys. */ 180 181#define INVALID_PTR ((void *) ~(uintptr_t) 0) 182#ifndef UCHAR_MAX 183# define UCHAR_MAX 0xff 184#endif 185#define INVALID_PTR_CHAR UCHAR_MAX 186 187/* Whether the cell C is occupied (non-empty). */ 188#define CELL_OCCUPIED(c) ((c)->key != INVALID_PTR) 189 190/* Clear the cell C, i.e. mark it as empty (unoccupied). */ 191#define CLEAR_CELL(c) ((c)->key = INVALID_PTR) 192 193/* "Next" cell is the cell following C, but wrapping back to CELLS 194 when C would reach CELLS+SIZE. */ 195#define NEXT_CELL(c, cells, size) (c != cells + (size - 1) ? c + 1 : cells) 196 197/* Loop over occupied cells starting at C, terminating the loop when 198 an empty cell is encountered. */ 199#define FOREACH_OCCUPIED_ADJACENT(c, cells, size) \ 200 for (; CELL_OCCUPIED (c); c = NEXT_CELL (c, cells, size)) 201 202/* Return the position of KEY in hash table SIZE large, hash function 203 being HASHFUN. */ 204#define HASH_POSITION(key, hashfun, size) ((hashfun) (key) % size) 205 206/* Find a prime near, but greather than or equal to SIZE. The primes 207 are looked up from a table with a selection of primes convenient 208 for this purpose. 209 210 PRIME_OFFSET is a minor optimization: it specifies start position 211 for the search for the large enough prime. The final offset is 212 stored in the same variable. That way the list of primes does not 213 have to be scanned from the beginning each time around. */ 214 215static int 216prime_size (int size, int *prime_offset) 217{ 218 static const int primes[] = { 219 13, 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031, 220 1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783, 221 19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941, 222 204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519, 223 1664681, 2164111, 2813353, 3657361, 4754591, 6180989, 8035301, 224 10445899, 13579681, 17653589, 22949669, 29834603, 38784989, 225 50420551, 65546729, 85210757, 110774011, 144006217, 187208107, 226 243370577, 316381771, 411296309, 534685237, 695090819, 903618083, 227 1174703521, 1527114613, 1837299131, 2147483647 228 }; 229 size_t i; 230 231 for (i = *prime_offset; i < countof (primes); i++) 232 if (primes[i] >= size) 233 { 234 /* Set the offset to the next prime. That is safe because, 235 next time we are called, it will be with a larger SIZE, 236 which means we could never return the same prime anyway. 237 (If that is not the case, the caller can simply reset 238 *prime_offset.) */ 239 *prime_offset = i + 1; 240 return primes[i]; 241 } 242 243 abort (); 244} 245 246static int cmp_pointer (const void *, const void *); 247 248/* Create a hash table with hash function HASH_FUNCTION and test 249 function TEST_FUNCTION. The table is empty (its count is 0), but 250 pre-allocated to store at least ITEMS items. 251 252 ITEMS is the number of items that the table can accept without 253 needing to resize. It is useful when creating a table that is to 254 be immediately filled with a known number of items. In that case, 255 the regrows are a waste of time, and specifying ITEMS correctly 256 will avoid them altogether. 257 258 Note that hash tables grow dynamically regardless of ITEMS. The 259 only use of ITEMS is to preallocate the table and avoid unnecessary 260 dynamic regrows. Don't bother making ITEMS prime because it's not 261 used as size unchanged. To start with a small table that grows as 262 needed, simply specify zero ITEMS. 263 264 If hash and test callbacks are not specified, identity mapping is 265 assumed, i.e. pointer values are used for key comparison. (Common 266 Lisp calls such tables EQ hash tables, and Java calls them 267 IdentityHashMaps.) If your keys require different comparison, 268 specify hash and test functions. For easy use of C strings as hash 269 keys, you can use the convenience functions make_string_hash_table 270 and make_nocase_string_hash_table. */ 271 272struct hash_table * 273hash_table_new (int items, 274 unsigned long (*hash_function) (const void *), 275 int (*test_function) (const void *, const void *)) 276{ 277 int size; 278 struct hash_table *ht = xnew (struct hash_table); 279 280 ht->hash_function = hash_function ? hash_function : hash_pointer; 281 ht->test_function = test_function ? test_function : cmp_pointer; 282 283 /* If the size of struct hash_table ever becomes a concern, this 284 field can go. (Wget doesn't create many hashes.) */ 285 ht->prime_offset = 0; 286 287 /* Calculate the size that ensures that the table will store at 288 least ITEMS keys without the need to resize. */ 289 size = 1 + items / HASH_MAX_FULLNESS; 290 size = prime_size (size, &ht->prime_offset); 291 ht->size = size; 292 ht->resize_threshold = size * HASH_MAX_FULLNESS; 293 /*assert (ht->resize_threshold >= items);*/ 294 295 ht->cells = xnew_array (struct cell, ht->size); 296 297 /* Mark cells as empty. We use 0xff rather than 0 to mark empty 298 keys because it allows us to use NULL/0 as keys. */ 299 memset (ht->cells, INVALID_PTR_CHAR, size * sizeof (struct cell)); 300 301 ht->count = 0; 302 303 return ht; 304} 305 306/* Free the data associated with hash table HT. */ 307 308void 309hash_table_destroy (struct hash_table *ht) 310{ 311 xfree (ht->cells); 312 xfree (ht); 313} 314 315/* The heart of most functions in this file -- find the cell whose 316 KEY is equal to key, using linear probing. Returns the cell 317 that matches KEY, or the first empty cell if none matches. */ 318 319static inline struct cell * 320find_cell (const struct hash_table *ht, const void *key) 321{ 322 struct cell *cells = ht->cells; 323 int size = ht->size; 324 struct cell *c = cells + HASH_POSITION (key, ht->hash_function, size); 325 testfun_t equals = ht->test_function; 326 327 FOREACH_OCCUPIED_ADJACENT (c, cells, size) 328 if (equals (key, c->key)) 329 break; 330 return c; 331} 332 333/* Get the value that corresponds to the key KEY in the hash table HT. 334 If no value is found, return NULL. Note that NULL is a legal value 335 for value; if you are storing NULLs in your hash table, you can use 336 hash_table_contains to be sure that a (possibly NULL) value exists 337 in the table. Or, you can use hash_table_get_pair instead of this 338 function. */ 339 340void * 341hash_table_get (const struct hash_table *ht, const void *key) 342{ 343 struct cell *c = find_cell (ht, key); 344 if (CELL_OCCUPIED (c)) 345 return c->value; 346 else 347 return NULL; 348} 349 350/* Like hash_table_get, but writes out the pointers to both key and 351 value. Returns non-zero on success. */ 352 353int 354hash_table_get_pair (const struct hash_table *ht, const void *lookup_key, 355 void *orig_key, void *value) 356{ 357 struct cell *c = find_cell (ht, lookup_key); 358 if (CELL_OCCUPIED (c)) 359 { 360 if (orig_key) 361 *(void **)orig_key = c->key; 362 if (value) 363 *(void **)value = c->value; 364 return 1; 365 } 366 else 367 return 0; 368} 369 370/* Return 1 if HT contains KEY, 0 otherwise. */ 371 372int 373hash_table_contains (const struct hash_table *ht, const void *key) 374{ 375 struct cell *c = find_cell (ht, key); 376 return CELL_OCCUPIED (c); 377} 378 379/* Grow hash table HT as necessary, and rehash all the key-value 380 mappings. */ 381 382static void 383grow_hash_table (struct hash_table *ht) 384{ 385 hashfun_t hasher = ht->hash_function; 386 struct cell *old_cells = ht->cells; 387 struct cell *old_end = ht->cells + ht->size; 388 struct cell *c, *cells; 389 int newsize; 390 391 newsize = prime_size (ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset); 392#if 0 393 printf ("growing from %d to %d; fullness %.2f%% to %.2f%%\n", 394 ht->size, newsize, 395 100.0 * ht->count / ht->size, 396 100.0 * ht->count / newsize); 397#endif 398 399 ht->size = newsize; 400 ht->resize_threshold = newsize * HASH_MAX_FULLNESS; 401 402 cells = xnew_array (struct cell, newsize); 403 memset (cells, INVALID_PTR_CHAR, newsize * sizeof (struct cell)); 404 ht->cells = cells; 405 406 for (c = old_cells; c < old_end; c++) 407 if (CELL_OCCUPIED (c)) 408 { 409 struct cell *new_c; 410 /* We don't need to test for uniqueness of keys because they 411 come from the hash table and are therefore known to be 412 unique. */ 413 new_c = cells + HASH_POSITION (c->key, hasher, newsize); 414 FOREACH_OCCUPIED_ADJACENT (new_c, cells, newsize) 415 ; 416 *new_c = *c; 417 } 418 419 xfree (old_cells); 420} 421 422/* Put VALUE in the hash table HT under the key KEY. This regrows the 423 table if necessary. */ 424 425void 426hash_table_put (struct hash_table *ht, const void *key, void *value) 427{ 428 struct cell *c = find_cell (ht, key); 429 if (CELL_OCCUPIED (c)) 430 { 431 /* update existing item */ 432 c->key = (void *)key; /* const? */ 433 c->value = value; 434 return; 435 } 436 437 /* If adding the item would make the table exceed max. fullness, 438 grow the table first. */ 439 if (ht->count >= ht->resize_threshold) 440 { 441 grow_hash_table (ht); 442 c = find_cell (ht, key); 443 } 444 445 /* add new item */ 446 ++ht->count; 447 c->key = (void *)key; /* const? */ 448 c->value = value; 449} 450 451/* Remove KEY->value mapping from HT. Return 0 if there was no such 452 entry; return 1 if an entry was removed. */ 453 454int 455hash_table_remove (struct hash_table *ht, const void *key) 456{ 457 struct cell *c = find_cell (ht, key); 458 if (!CELL_OCCUPIED (c)) 459 return 0; 460 else 461 { 462 int size = ht->size; 463 struct cell *cells = ht->cells; 464 hashfun_t hasher = ht->hash_function; 465 466 CLEAR_CELL (c); 467 --ht->count; 468 469 /* Rehash all the entries following C. The alternative 470 approach is to mark the entry as deleted, i.e. create a 471 "tombstone". That speeds up removal, but leaves a lot of 472 garbage and slows down hash_table_get and hash_table_put. */ 473 474 c = NEXT_CELL (c, cells, size); 475 FOREACH_OCCUPIED_ADJACENT (c, cells, size) 476 { 477 const void *key2 = c->key; 478 struct cell *c_new; 479 480 /* Find the new location for the key. */ 481 c_new = cells + HASH_POSITION (key2, hasher, size); 482 FOREACH_OCCUPIED_ADJACENT (c_new, cells, size) 483 if (key2 == c_new->key) 484 /* The cell C (key2) is already where we want it (in 485 C_NEW's "chain" of keys.) */ 486 goto next_rehash; 487 488 *c_new = *c; 489 CLEAR_CELL (c); 490 491 next_rehash: 492 ; 493 } 494 return 1; 495 } 496} 497 498/* Clear HT of all entries. After calling this function, the count 499 and the fullness of the hash table will be zero. The size will 500 remain unchanged. */ 501 502void 503hash_table_clear (struct hash_table *ht) 504{ 505 memset (ht->cells, INVALID_PTR_CHAR, ht->size * sizeof (struct cell)); 506 ht->count = 0; 507} 508 509/* Call FN for each entry in HT. FN is called with three arguments: 510 the key, the value, and ARG. When FN returns a non-zero value, the 511 mapping stops. 512 513 It is undefined what happens if you add or remove entries in the 514 hash table while hash_table_for_each is running. The exception is 515 the entry you're currently mapping over; you may call 516 hash_table_put or hash_table_remove on that entry's key. That is 517 also the reason why this function cannot be implemented in terms of 518 hash_table_iterate. */ 519 520void 521hash_table_for_each (struct hash_table *ht, 522 int (*fn) (void *, void *, void *), void *arg) 523{ 524 struct cell *c = ht->cells; 525 struct cell *end = ht->cells + ht->size; 526 527 for (; c < end; c++) 528 if (CELL_OCCUPIED (c)) 529 { 530 void *key; 531 repeat: 532 key = c->key; 533 if (fn (key, c->value, arg)) 534 return; 535 /* hash_table_remove might have moved the adjacent cells. */ 536 if (c->key != key && CELL_OCCUPIED (c)) 537 goto repeat; 538 } 539} 540 541/* Initiate iteration over HT. Entries are obtained with 542 hash_table_iter_next, a typical iteration loop looking like this: 543 544 hash_table_iterator iter; 545 for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) 546 ... do something with iter.key and iter.value ... 547 548 The iterator does not need to be deallocated after use. The hash 549 table must not be modified while being iterated over. */ 550 551void 552hash_table_iterate (struct hash_table *ht, hash_table_iterator *iter) 553{ 554 iter->pos = ht->cells; 555 iter->end = ht->cells + ht->size; 556} 557 558/* Get the next hash table entry. ITER is an iterator object 559 initialized using hash_table_iterate. While there are more 560 entries, the key and value pointers are stored to ITER->key and 561 ITER->value respectively and 1 is returned. When there are no more 562 entries, 0 is returned. 563 564 If the hash table is modified between calls to this function, the 565 result is undefined. */ 566 567int 568hash_table_iter_next (hash_table_iterator *iter) 569{ 570 struct cell *c = iter->pos; 571 struct cell *end = iter->end; 572 for (; c < end; c++) 573 if (CELL_OCCUPIED (c)) 574 { 575 iter->key = c->key; 576 iter->value = c->value; 577 iter->pos = c + 1; 578 return 1; 579 } 580 return 0; 581} 582 583/* Return the number of elements in the hash table. This is not the 584 same as the physical size of the hash table, which is always 585 greater than the number of elements. */ 586 587int 588hash_table_count (const struct hash_table *ht) 589{ 590 return ht->count; 591} 592 593/* Functions from this point onward are meant for convenience and 594 don't strictly belong to this file. However, this is as good a 595 place for them as any. */ 596 597/* Guidelines for creating custom hash and test functions: 598 599 - The test function returns non-zero for keys that are considered 600 "equal", zero otherwise. 601 602 - The hash function returns a number that represents the 603 "distinctness" of the object. In more precise terms, it means 604 that for any two objects that test "equal" under the test 605 function, the hash function MUST produce the same result. 606 607 This does not mean that all different objects must produce 608 different values (that would be "perfect" hashing), only that 609 non-distinct objects must produce the same values! For instance, 610 a hash function that returns 0 for any given object is a 611 perfectly valid (albeit extremely bad) hash function. A hash 612 function that hashes a string by adding up all its characters is 613 another example of a valid (but still quite bad) hash function. 614 615 It is not hard to make hash and test functions agree about 616 equality. For example, if the test function compares strings 617 case-insensitively, the hash function can lower-case the 618 characters when calculating the hash value. That ensures that 619 two strings differing only in case will hash the same. 620 621 - To prevent performance degradation, choose a hash function with 622 as good "spreading" as possible. A good hash function will use 623 all the bits of the input when calculating the hash, and will 624 react to even small changes in input with a completely different 625 output. But don't make the hash function itself overly slow, 626 because you'll be incurring a non-negligible overhead to all hash 627 table operations. */ 628 629/* 630 * Support for hash tables whose keys are strings. 631 * 632 */ 633 634/* Base 31 hash function. Taken from Gnome's glib, modified to use 635 standard C types. 636 637 We used to use the popular hash function from the Dragon Book, but 638 this one seems to perform much better, both by being faster and by 639 generating less collisions. */ 640 641static unsigned long 642hash_string (const void *key) 643{ 644 const char *p = key; 645 unsigned int h = *p; 646 647 if (h) 648 for (p += 1; *p != '\0'; p++) 649 h = (h << 5) - h + *p; 650 651 return h; 652} 653 654/* Frontend for strcmp usable for hash tables. */ 655 656static int 657cmp_string (const void *s1, const void *s2) 658{ 659 return !strcmp ((const char *)s1, (const char *)s2); 660} 661 662/* Return a hash table of preallocated to store at least ITEMS items 663 suitable to use strings as keys. */ 664 665struct hash_table * 666make_string_hash_table (int items) 667{ 668 return hash_table_new (items, hash_string, cmp_string); 669} 670 671/* 672 * Support for hash tables whose keys are strings, but which are 673 * compared case-insensitively. 674 * 675 */ 676 677/* Like hash_string, but produce the same hash regardless of the case. */ 678 679static unsigned long 680hash_string_nocase (const void *key) 681{ 682 const char *p = key; 683 unsigned int h = c_tolower (*p); 684 685 if (h) 686 for (p += 1; *p != '\0'; p++) 687 h = (h << 5) - h + c_tolower (*p); 688 689 return h; 690} 691 692/* Like string_cmp, but doing case-insensitive compareison. */ 693 694static int 695string_cmp_nocase (const void *s1, const void *s2) 696{ 697 return !strcasecmp ((const char *)s1, (const char *)s2); 698} 699 700/* Like make_string_hash_table, but uses string_hash_nocase and 701 string_cmp_nocase. */ 702 703struct hash_table * 704make_nocase_string_hash_table (int items) 705{ 706 return hash_table_new (items, hash_string_nocase, string_cmp_nocase); 707} 708 709/* Hashing of numeric values, such as pointers and integers. 710 711 This implementation is the Robert Jenkins' 32 bit Mix Function, 712 with a simple adaptation for 64-bit values. According to Jenkins 713 it should offer excellent spreading of values. Unlike the popular 714 Knuth's multiplication hash, this function doesn't need to know the 715 hash table size to work. */ 716 717unsigned long 718hash_pointer (const void *ptr) 719{ 720 uintptr_t key = (uintptr_t) ptr; 721 key += (key << 12); 722 key ^= (key >> 22); 723 key += (key << 4); 724 key ^= (key >> 9); 725 key += (key << 10); 726 key ^= (key >> 2); 727 key += (key << 7); 728 key ^= (key >> 12); 729#if SIZEOF_VOID_P > 4 730 key += (key << 44); 731 key ^= (key >> 54); 732 key += (key << 36); 733 key ^= (key >> 41); 734 key += (key << 42); 735 key ^= (key >> 34); 736 key += (key << 39); 737 key ^= (key >> 44); 738#endif 739 return (unsigned long) key; 740} 741 742static int 743cmp_pointer (const void *ptr1, const void *ptr2) 744{ 745 return ptr1 == ptr2; 746} 747 748#ifdef TEST 749 750#include <stdio.h> 751#include <string.h> 752 753void 754print_hash (struct hash_table *sht) 755{ 756 hash_table_iterator iter; 757 int count = 0; 758 759 for (hash_table_iterate (sht, &iter); hash_table_iter_next (&iter); 760 ++count) 761 printf ("%s: %s\n", iter.key, iter.value); 762 assert (count == sht->count); 763} 764 765int 766main (void) 767{ 768 struct hash_table *ht = make_string_hash_table (0); 769 char line[80]; 770 while ((fgets (line, sizeof (line), stdin))) 771 { 772 int len = strlen (line); 773 if (len <= 1) 774 continue; 775 line[--len] = '\0'; 776 if (!hash_table_contains (ht, line)) 777 hash_table_put (ht, strdup (line), "here I am!"); 778#if 1 779 if (len % 5 == 0) 780 { 781 char *line_copy; 782 if (hash_table_get_pair (ht, line, &line_copy, NULL)) 783 { 784 hash_table_remove (ht, line); 785 xfree (line_copy); 786 } 787 } 788#endif 789 } 790#if 0 791 print_hash (ht); 792#endif 793#if 1 794 printf ("%d %d\n", ht->count, ht->size); 795#endif 796 return 0; 797} 798#endif /* TEST */ 799