158551Skris/* Hash table for checking keyword links. Implemented using double hashing. 2228060Sbapt Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc. 3228060Sbapt Written by Douglas C. Schmidt <schmidt@ics.uci.edu> 4228060Sbapt and Bruno Haible <bruno@clisp.org>. 558551Skris 6228060Sbapt This file is part of GNU GPERF. 758551Skris 8228060Sbapt GNU GPERF is free software; you can redistribute it and/or modify 9228060Sbapt it under the terms of the GNU General Public License as published by 10228060Sbapt the Free Software Foundation; either version 2, or (at your option) 11228060Sbapt any later version. 1258551Skris 13228060Sbapt GNU GPERF is distributed in the hope that it will be useful, 14228060Sbapt but WITHOUT ANY WARRANTY; without even the implied warranty of 15228060Sbapt MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16228060Sbapt GNU General Public License for more details. 1758551Skris 18228060Sbapt You should have received a copy of the GNU General Public License 19228060Sbapt along with this program; see the file COPYING. 20228060Sbapt If not, write to the Free Software Foundation, Inc., 21228060Sbapt 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 2258551Skris 23228060Sbapt/* Specification. */ 2458551Skris#include "hash-table.h" 2558551Skris 2658551Skris#include <stdio.h> 2758551Skris#include <string.h> /* declares memset(), strcmp() */ 2858551Skris#include <hash.h> 2958551Skris#include "options.h" 3058551Skris 31228060Sbapt/* We use a hash table with double hashing. This is the simplest kind of 32228060Sbapt hash table, given that we always only insert and never remove entries 33228060Sbapt from the hash table. */ 3458551Skris 35228060Sbapt/* To make double hashing efficient, there need to be enough spare entries. */ 36228060Sbaptstatic const int size_factor = 10; 37228060Sbapt 38228060Sbapt/* We make the size of the hash table a power of 2. This allows for two 39228060Sbapt optimizations: It eliminates the modulo instruction, and allows for an 40228060Sbapt easy secondary hashing function. */ 41228060Sbapt 42228060Sbapt/* Constructor. */ 43228060SbaptHash_Table::Hash_Table (unsigned int size, bool ignore_length) 44228060Sbapt : _ignore_length (ignore_length), 45228060Sbapt _collisions (0) 4658551Skris{ 47228060Sbapt /* There need to be enough spare entries. */ 48228060Sbapt size = size * size_factor; 49228060Sbapt 50228060Sbapt /* Find smallest power of 2 that is >= size. */ 51228060Sbapt unsigned int shift = 0; 52228060Sbapt if ((size >> 16) > 0) 53228060Sbapt { 54228060Sbapt size = size >> 16; 55228060Sbapt shift += 16; 56228060Sbapt } 57228060Sbapt if ((size >> 8) > 0) 58228060Sbapt { 59228060Sbapt size = size >> 8; 60228060Sbapt shift += 8; 61228060Sbapt } 62228060Sbapt if ((size >> 4) > 0) 63228060Sbapt { 64228060Sbapt size = size >> 4; 65228060Sbapt shift += 4; 66228060Sbapt } 67228060Sbapt if ((size >> 2) > 0) 68228060Sbapt { 69228060Sbapt size = size >> 2; 70228060Sbapt shift += 2; 71228060Sbapt } 72228060Sbapt if ((size >> 1) > 0) 73228060Sbapt { 74228060Sbapt size = size >> 1; 75228060Sbapt shift += 1; 76228060Sbapt } 77228060Sbapt _log_size = shift; 78228060Sbapt _size = 1 << shift; 79228060Sbapt 80228060Sbapt /* Allocate table. */ 81228060Sbapt _table = new KeywordExt*[_size]; 82228060Sbapt memset (_table, 0, _size * sizeof (*_table)); 8358551Skris} 8458551Skris 85228060Sbapt/* Destructor. */ 86228060SbaptHash_Table::~Hash_Table () 8758551Skris{ 88228060Sbapt delete[] _table; 89228060Sbapt} 9058551Skris 91228060Sbapt/* Print the table's contents. */ 92228060Sbaptvoid 93228060SbaptHash_Table::dump () const 94228060Sbapt{ 95228060Sbapt int field_width; 9658551Skris 97228060Sbapt field_width = 0; 98228060Sbapt { 99228060Sbapt for (int i = _size - 1; i >= 0; i--) 100228060Sbapt if (_table[i]) 101228060Sbapt if (field_width < _table[i]->_selchars_length) 102228060Sbapt field_width = _table[i]->_selchars_length; 103228060Sbapt } 10458551Skris 105228060Sbapt fprintf (stderr, 106228060Sbapt "\ndumping the hash table\n" 107228060Sbapt "total available table slots = %d, total bytes = %d, total collisions = %d\n" 108228060Sbapt "location, %*s, keyword\n", 109228060Sbapt _size, _size * static_cast<unsigned int>(sizeof (*_table)), 110228060Sbapt _collisions, field_width, "keysig"); 111228060Sbapt 112228060Sbapt for (int i = _size - 1; i >= 0; i--) 113228060Sbapt if (_table[i]) 114228060Sbapt { 115228060Sbapt fprintf (stderr, "%8d, ", i); 116228060Sbapt if (field_width > _table[i]->_selchars_length) 117228060Sbapt fprintf (stderr, "%*s", field_width - _table[i]->_selchars_length, ""); 118228060Sbapt for (int j = 0; j < _table[i]->_selchars_length; j++) 119228060Sbapt putc (_table[i]->_selchars[j], stderr); 120228060Sbapt fprintf (stderr, ", %.*s\n", 121228060Sbapt _table[i]->_allchars_length, _table[i]->_allchars); 122228060Sbapt } 123228060Sbapt 124228060Sbapt fprintf (stderr, "\nend dumping hash table\n\n"); 12558551Skris} 12658551Skris 127228060Sbapt/* Compares two items. */ 128228060Sbaptinline bool 129228060SbaptHash_Table::equal (KeywordExt *item1, KeywordExt *item2) const 130228060Sbapt{ 131228060Sbapt return item1->_selchars_length == item2->_selchars_length 132228060Sbapt && memcmp (item1->_selchars, item2->_selchars, 133228060Sbapt item2->_selchars_length * sizeof (unsigned int)) 134228060Sbapt == 0 135228060Sbapt && (_ignore_length 136228060Sbapt || item1->_allchars_length == item2->_allchars_length); 137228060Sbapt} 13858551Skris 139228060Sbapt/* Attempts to insert ITEM in the table. If there is already an equal 140228060Sbapt entry in it, returns it. Otherwise inserts ITEM and returns NULL. */ 141228060SbaptKeywordExt * 142228060SbaptHash_Table::insert (KeywordExt *item) 14358551Skris{ 144228060Sbapt unsigned hash_val = 145228060Sbapt hashpjw (reinterpret_cast<const unsigned char *>(item->_selchars), 146228060Sbapt item->_selchars_length * sizeof (unsigned int)); 147228060Sbapt unsigned int probe = hash_val & (_size - 1); 148228060Sbapt unsigned int increment = 149228060Sbapt (((hash_val >> _log_size) 150228060Sbapt ^ (_ignore_length ? 0 : item->_allchars_length)) 151228060Sbapt << 1) + 1; 152228060Sbapt /* Note that because _size is a power of 2 and increment is odd, 153228060Sbapt we have gcd(increment,_size) = 1, which guarantees that we'll find 154228060Sbapt an empty entry during the loop. */ 15558551Skris 156228060Sbapt while (_table[probe] != NULL) 15758551Skris { 158228060Sbapt if (equal (_table[probe], item)) 159228060Sbapt return _table[probe]; 16067064Sobrien 161228060Sbapt _collisions++; 162228060Sbapt probe = (probe + increment) & (_size - 1); 16358551Skris } 16458551Skris 165228060Sbapt _table[probe] = item; 166228060Sbapt return NULL; 16758551Skris} 168