158551Skris/* Hash table for checking keyword links. Implemented using double hashing. 2230237Sbapt Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc. 3230237Sbapt Written by Douglas C. Schmidt <schmidt@ics.uci.edu> 4230237Sbapt and Bruno Haible <bruno@clisp.org>. 558551Skris 6230237Sbapt This file is part of GNU GPERF. 758551Skris 8230237Sbapt GNU GPERF is free software; you can redistribute it and/or modify 9230237Sbapt it under the terms of the GNU General Public License as published by 10230237Sbapt the Free Software Foundation; either version 2, or (at your option) 11230237Sbapt any later version. 1258551Skris 13230237Sbapt GNU GPERF is distributed in the hope that it will be useful, 14230237Sbapt but WITHOUT ANY WARRANTY; without even the implied warranty of 15230237Sbapt MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16230237Sbapt GNU General Public License for more details. 1758551Skris 18230237Sbapt You should have received a copy of the GNU General Public License 19230237Sbapt along with this program; see the file COPYING. 20230237Sbapt If not, write to the Free Software Foundation, Inc., 21230237Sbapt 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 2258551Skris 23230237Sbapt/* Specification. */ 2458551Skris#include "hash-table.h" 2558551Skris 2658551Skris#include <stdio.h> 2758551Skris#include <string.h> /* declares memset(), strcmp() */ 2858551Skris#include <hash.h> 2958551Skris#include "options.h" 3058551Skris 31230237Sbapt/* We use a hash table with double hashing. This is the simplest kind of 32230237Sbapt hash table, given that we always only insert and never remove entries 33230237Sbapt from the hash table. */ 3458551Skris 35230237Sbapt/* To make double hashing efficient, there need to be enough spare entries. */ 36230237Sbaptstatic const int size_factor = 10; 37230237Sbapt 38230237Sbapt/* We make the size of the hash table a power of 2. This allows for two 39230237Sbapt optimizations: It eliminates the modulo instruction, and allows for an 40230237Sbapt easy secondary hashing function. */ 41230237Sbapt 42230237Sbapt/* Constructor. */ 43230237SbaptHash_Table::Hash_Table (unsigned int size, bool ignore_length) 44230237Sbapt : _ignore_length (ignore_length), 45230237Sbapt _collisions (0) 4658551Skris{ 47230237Sbapt /* There need to be enough spare entries. */ 48230237Sbapt size = size * size_factor; 49230237Sbapt 50230237Sbapt /* Find smallest power of 2 that is >= size. */ 51230237Sbapt unsigned int shift = 0; 52230237Sbapt if ((size >> 16) > 0) 53230237Sbapt { 54230237Sbapt size = size >> 16; 55230237Sbapt shift += 16; 56230237Sbapt } 57230237Sbapt if ((size >> 8) > 0) 58230237Sbapt { 59230237Sbapt size = size >> 8; 60230237Sbapt shift += 8; 61230237Sbapt } 62230237Sbapt if ((size >> 4) > 0) 63230237Sbapt { 64230237Sbapt size = size >> 4; 65230237Sbapt shift += 4; 66230237Sbapt } 67230237Sbapt if ((size >> 2) > 0) 68230237Sbapt { 69230237Sbapt size = size >> 2; 70230237Sbapt shift += 2; 71230237Sbapt } 72230237Sbapt if ((size >> 1) > 0) 73230237Sbapt { 74230237Sbapt size = size >> 1; 75230237Sbapt shift += 1; 76230237Sbapt } 77230237Sbapt _log_size = shift; 78230237Sbapt _size = 1 << shift; 79230237Sbapt 80230237Sbapt /* Allocate table. */ 81230237Sbapt _table = new KeywordExt*[_size]; 82230237Sbapt memset (_table, 0, _size * sizeof (*_table)); 8358551Skris} 8458551Skris 85230237Sbapt/* Destructor. */ 86230237SbaptHash_Table::~Hash_Table () 8758551Skris{ 88230237Sbapt delete[] _table; 89230237Sbapt} 9058551Skris 91230237Sbapt/* Print the table's contents. */ 92230237Sbaptvoid 93230237SbaptHash_Table::dump () const 94230237Sbapt{ 95230237Sbapt int field_width; 9658551Skris 97230237Sbapt field_width = 0; 98230237Sbapt { 99230237Sbapt for (int i = _size - 1; i >= 0; i--) 100230237Sbapt if (_table[i]) 101230237Sbapt if (field_width < _table[i]->_selchars_length) 102230237Sbapt field_width = _table[i]->_selchars_length; 103230237Sbapt } 10458551Skris 105230237Sbapt fprintf (stderr, 106230237Sbapt "\ndumping the hash table\n" 107230237Sbapt "total available table slots = %d, total bytes = %d, total collisions = %d\n" 108230237Sbapt "location, %*s, keyword\n", 109230237Sbapt _size, _size * static_cast<unsigned int>(sizeof (*_table)), 110230237Sbapt _collisions, field_width, "keysig"); 111230237Sbapt 112230237Sbapt for (int i = _size - 1; i >= 0; i--) 113230237Sbapt if (_table[i]) 114230237Sbapt { 115230237Sbapt fprintf (stderr, "%8d, ", i); 116230237Sbapt if (field_width > _table[i]->_selchars_length) 117230237Sbapt fprintf (stderr, "%*s", field_width - _table[i]->_selchars_length, ""); 118230237Sbapt for (int j = 0; j < _table[i]->_selchars_length; j++) 119230237Sbapt putc (_table[i]->_selchars[j], stderr); 120230237Sbapt fprintf (stderr, ", %.*s\n", 121230237Sbapt _table[i]->_allchars_length, _table[i]->_allchars); 122230237Sbapt } 123230237Sbapt 124230237Sbapt fprintf (stderr, "\nend dumping hash table\n\n"); 12558551Skris} 12658551Skris 127230237Sbapt/* Compares two items. */ 128230237Sbaptinline bool 129230237SbaptHash_Table::equal (KeywordExt *item1, KeywordExt *item2) const 130230237Sbapt{ 131230237Sbapt return item1->_selchars_length == item2->_selchars_length 132230237Sbapt && memcmp (item1->_selchars, item2->_selchars, 133230237Sbapt item2->_selchars_length * sizeof (unsigned int)) 134230237Sbapt == 0 135230237Sbapt && (_ignore_length 136230237Sbapt || item1->_allchars_length == item2->_allchars_length); 137230237Sbapt} 13858551Skris 139230237Sbapt/* Attempts to insert ITEM in the table. If there is already an equal 140230237Sbapt entry in it, returns it. Otherwise inserts ITEM and returns NULL. */ 141230237SbaptKeywordExt * 142230237SbaptHash_Table::insert (KeywordExt *item) 14358551Skris{ 144230237Sbapt unsigned hash_val = 145230237Sbapt hashpjw (reinterpret_cast<const unsigned char *>(item->_selchars), 146230237Sbapt item->_selchars_length * sizeof (unsigned int)); 147230237Sbapt unsigned int probe = hash_val & (_size - 1); 148230237Sbapt unsigned int increment = 149230237Sbapt (((hash_val >> _log_size) 150230237Sbapt ^ (_ignore_length ? 0 : item->_allchars_length)) 151230237Sbapt << 1) + 1; 152230237Sbapt /* Note that because _size is a power of 2 and increment is odd, 153230237Sbapt we have gcd(increment,_size) = 1, which guarantees that we'll find 154230237Sbapt an empty entry during the loop. */ 15558551Skris 156230237Sbapt while (_table[probe] != NULL) 15758551Skris { 158230237Sbapt if (equal (_table[probe], item)) 159230237Sbapt return _table[probe]; 16067064Sobrien 161230237Sbapt _collisions++; 162230237Sbapt probe = (probe + increment) & (_size - 1); 16358551Skris } 16458551Skris 165230237Sbapt _table[probe] = item; 166230237Sbapt return NULL; 16758551Skris} 168