158551Skris/* Hash table for checking keyword links.  Implemented using double hashing.
2230237Sbapt   Copyright (C) 1989-1998, 2000, 2002 Free Software Foundation, Inc.
3230237Sbapt   Written by Douglas C. Schmidt <schmidt@ics.uci.edu>
4230237Sbapt   and Bruno Haible <bruno@clisp.org>.
558551Skris
6230237Sbapt   This file is part of GNU GPERF.
758551Skris
8230237Sbapt   GNU GPERF is free software; you can redistribute it and/or modify
9230237Sbapt   it under the terms of the GNU General Public License as published by
10230237Sbapt   the Free Software Foundation; either version 2, or (at your option)
11230237Sbapt   any later version.
1258551Skris
13230237Sbapt   GNU GPERF is distributed in the hope that it will be useful,
14230237Sbapt   but WITHOUT ANY WARRANTY; without even the implied warranty of
15230237Sbapt   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16230237Sbapt   GNU General Public License for more details.
1758551Skris
18230237Sbapt   You should have received a copy of the GNU General Public License
19230237Sbapt   along with this program; see the file COPYING.
20230237Sbapt   If not, write to the Free Software Foundation, Inc.,
21230237Sbapt   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
2258551Skris
23230237Sbapt/* Specification. */
2458551Skris#include "hash-table.h"
2558551Skris
2658551Skris#include <stdio.h>
2758551Skris#include <string.h> /* declares memset(), strcmp() */
2858551Skris#include <hash.h>
2958551Skris#include "options.h"
3058551Skris
31230237Sbapt/* We use a hash table with double hashing.  This is the simplest kind of
32230237Sbapt   hash table, given that we always only insert and never remove entries
33230237Sbapt   from the hash table.  */
3458551Skris
35230237Sbapt/* To make double hashing efficient, there need to be enough spare entries.  */
36230237Sbaptstatic const int size_factor = 10;
37230237Sbapt
38230237Sbapt/* We make the size of the hash table a power of 2.  This allows for two
39230237Sbapt   optimizations: It eliminates the modulo instruction, and allows for an
40230237Sbapt   easy secondary hashing function.  */
41230237Sbapt
42230237Sbapt/* Constructor.  */
43230237SbaptHash_Table::Hash_Table (unsigned int size, bool ignore_length)
44230237Sbapt  : _ignore_length (ignore_length),
45230237Sbapt    _collisions (0)
4658551Skris{
47230237Sbapt  /* There need to be enough spare entries.  */
48230237Sbapt  size = size * size_factor;
49230237Sbapt
50230237Sbapt  /* Find smallest power of 2 that is >= size.  */
51230237Sbapt  unsigned int shift = 0;
52230237Sbapt  if ((size >> 16) > 0)
53230237Sbapt    {
54230237Sbapt      size = size >> 16;
55230237Sbapt      shift += 16;
56230237Sbapt    }
57230237Sbapt  if ((size >> 8) > 0)
58230237Sbapt    {
59230237Sbapt      size = size >> 8;
60230237Sbapt      shift += 8;
61230237Sbapt    }
62230237Sbapt  if ((size >> 4) > 0)
63230237Sbapt    {
64230237Sbapt      size = size >> 4;
65230237Sbapt      shift += 4;
66230237Sbapt    }
67230237Sbapt  if ((size >> 2) > 0)
68230237Sbapt    {
69230237Sbapt      size = size >> 2;
70230237Sbapt      shift += 2;
71230237Sbapt    }
72230237Sbapt  if ((size >> 1) > 0)
73230237Sbapt    {
74230237Sbapt      size = size >> 1;
75230237Sbapt      shift += 1;
76230237Sbapt    }
77230237Sbapt  _log_size = shift;
78230237Sbapt  _size = 1 << shift;
79230237Sbapt
80230237Sbapt  /* Allocate table.  */
81230237Sbapt  _table = new KeywordExt*[_size];
82230237Sbapt  memset (_table, 0, _size * sizeof (*_table));
8358551Skris}
8458551Skris
85230237Sbapt/* Destructor.  */
86230237SbaptHash_Table::~Hash_Table ()
8758551Skris{
88230237Sbapt  delete[] _table;
89230237Sbapt}
9058551Skris
91230237Sbapt/* Print the table's contents.  */
92230237Sbaptvoid
93230237SbaptHash_Table::dump () const
94230237Sbapt{
95230237Sbapt  int field_width;
9658551Skris
97230237Sbapt  field_width = 0;
98230237Sbapt  {
99230237Sbapt    for (int i = _size - 1; i >= 0; i--)
100230237Sbapt      if (_table[i])
101230237Sbapt        if (field_width < _table[i]->_selchars_length)
102230237Sbapt          field_width = _table[i]->_selchars_length;
103230237Sbapt  }
10458551Skris
105230237Sbapt  fprintf (stderr,
106230237Sbapt           "\ndumping the hash table\n"
107230237Sbapt           "total available table slots = %d, total bytes = %d, total collisions = %d\n"
108230237Sbapt           "location, %*s, keyword\n",
109230237Sbapt           _size, _size * static_cast<unsigned int>(sizeof (*_table)),
110230237Sbapt           _collisions, field_width, "keysig");
111230237Sbapt
112230237Sbapt  for (int i = _size - 1; i >= 0; i--)
113230237Sbapt    if (_table[i])
114230237Sbapt      {
115230237Sbapt        fprintf (stderr, "%8d, ", i);
116230237Sbapt        if (field_width > _table[i]->_selchars_length)
117230237Sbapt          fprintf (stderr, "%*s", field_width - _table[i]->_selchars_length, "");
118230237Sbapt        for (int j = 0; j < _table[i]->_selchars_length; j++)
119230237Sbapt          putc (_table[i]->_selchars[j], stderr);
120230237Sbapt        fprintf (stderr, ", %.*s\n",
121230237Sbapt                 _table[i]->_allchars_length, _table[i]->_allchars);
122230237Sbapt      }
123230237Sbapt
124230237Sbapt  fprintf (stderr, "\nend dumping hash table\n\n");
12558551Skris}
12658551Skris
127230237Sbapt/* Compares two items.  */
128230237Sbaptinline bool
129230237SbaptHash_Table::equal (KeywordExt *item1, KeywordExt *item2) const
130230237Sbapt{
131230237Sbapt  return item1->_selchars_length == item2->_selchars_length
132230237Sbapt         && memcmp (item1->_selchars, item2->_selchars,
133230237Sbapt                    item2->_selchars_length * sizeof (unsigned int))
134230237Sbapt            == 0
135230237Sbapt         && (_ignore_length
136230237Sbapt             || item1->_allchars_length == item2->_allchars_length);
137230237Sbapt}
13858551Skris
139230237Sbapt/* Attempts to insert ITEM in the table.  If there is already an equal
140230237Sbapt   entry in it, returns it.  Otherwise inserts ITEM and returns NULL.  */
141230237SbaptKeywordExt *
142230237SbaptHash_Table::insert (KeywordExt *item)
14358551Skris{
144230237Sbapt  unsigned hash_val =
145230237Sbapt    hashpjw (reinterpret_cast<const unsigned char *>(item->_selchars),
146230237Sbapt             item->_selchars_length * sizeof (unsigned int));
147230237Sbapt  unsigned int probe = hash_val & (_size - 1);
148230237Sbapt  unsigned int increment =
149230237Sbapt    (((hash_val >> _log_size)
150230237Sbapt      ^ (_ignore_length ? 0 : item->_allchars_length))
151230237Sbapt     << 1) + 1;
152230237Sbapt  /* Note that because _size is a power of 2 and increment is odd,
153230237Sbapt     we have gcd(increment,_size) = 1, which guarantees that we'll find
154230237Sbapt     an empty entry during the loop.  */
15558551Skris
156230237Sbapt  while (_table[probe] != NULL)
15758551Skris    {
158230237Sbapt      if (equal (_table[probe], item))
159230237Sbapt        return _table[probe];
16067064Sobrien
161230237Sbapt      _collisions++;
162230237Sbapt      probe = (probe + increment) & (_size - 1);
16358551Skris    }
16458551Skris
165230237Sbapt  _table[probe] = item;
166230237Sbapt  return NULL;
16758551Skris}
168