1/* bucket.c - The routines for playing with hash buckets. */
2
3/*  This file is part of GDBM, the GNU data base manager, by Philip A. Nelson.
4    Copyright (C) 1990, 1991, 1993  Free Software Foundation, Inc.
5
6    GDBM is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10
11    GDBM is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15
16    You should have received a copy of the GNU General Public License
17    along with GDBM; see the file COPYING.  If not, write to
18    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
19
20    You may contact the author by:
21       e-mail:  phil@cs.wwu.edu
22      us-mail:  Philip A. Nelson
23                Computer Science Department
24                Western Washington University
25                Bellingham, WA 98226
26
27*************************************************************************/
28
29
30/* include system configuration before all else. */
31#include "autoconf.h"
32
33#include "gdbmdefs.h"
34
35
36/* Initializing a new hash buckets sets all bucket entries to -1 hash value. */
37void
38_gdbm_new_bucket (dbf, bucket, bits)
39     gdbm_file_info *dbf;
40     hash_bucket *bucket;
41     int bits;
42{
43  int index;
44
45  /* Initialize the avail block. */
46  bucket->av_count = 0;
47
48  /* Set the information fields first. */
49  bucket->bucket_bits = bits;
50  bucket->count = 0;
51
52  /* Initialize all bucket elements. */
53  for (index = 0; index < dbf->header->bucket_elems; index++)
54    bucket->h_table[index].hash_value = -1;
55}
56
57
58
59/* Find a bucket for DBF that is pointed to by the bucket directory from
60   location DIR_INDEX.   The bucket cache is first checked to see if it
61   is already in memory.  If not, a bucket may be tossed to read the new
62   bucket.  In any case, the requested bucket is make the "current" bucket
63   and dbf->bucket points to the correct bucket. */
64
65void
66_gdbm_get_bucket (dbf, dir_index)
67     gdbm_file_info *dbf;
68     int dir_index;
69{
70  off_t bucket_adr;	/* The address of the correct hash bucket.  */
71  int   num_bytes;	/* The number of bytes read. */
72  off_t	file_pos;	/* The return address for lseek. */
73  register int index;	/* Loop index. */
74
75  /* Initial set up. */
76  dbf->bucket_dir = dir_index;
77  bucket_adr = dbf->dir [dir_index];
78
79  if (dbf->bucket_cache == NULL)
80    {
81      if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1)
82        _gdbm_fatal(dbf, "couldn't init cache");
83    }
84
85  /* Is that one is not already current, we must find it. */
86  if (dbf->cache_entry->ca_adr != bucket_adr)
87    {
88      /* Look in the cache. */
89      for (index = 0; index < dbf->cache_size; index++)
90        {
91	  if (dbf->bucket_cache[index].ca_adr == bucket_adr)
92	    {
93	      dbf->bucket = dbf->bucket_cache[index].ca_bucket;
94	      dbf->cache_entry = &dbf->bucket_cache[index];
95	      return;
96	    }
97        }
98
99      /* It is not in the cache, read it from the disk. */
100      dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
101      if (dbf->bucket_cache[dbf->last_read].ca_changed)
102	_gdbm_write_bucket (dbf, &dbf->bucket_cache[dbf->last_read]);
103      dbf->bucket_cache[dbf->last_read].ca_adr = bucket_adr;
104      dbf->bucket = dbf->bucket_cache[dbf->last_read].ca_bucket;
105      dbf->cache_entry = &dbf->bucket_cache[dbf->last_read];
106      dbf->cache_entry->ca_data.elem_loc = -1;
107      dbf->cache_entry->ca_changed = FALSE;
108
109      /* Read the bucket. */
110      file_pos = lseek (dbf->desc, bucket_adr, L_SET);
111      if (file_pos != bucket_adr)
112	_gdbm_fatal (dbf, "lseek error");
113
114      num_bytes = read (dbf->desc, dbf->bucket, dbf->header->bucket_size);
115      if (num_bytes != dbf->header->bucket_size)
116	_gdbm_fatal (dbf, "read error");
117    }
118
119  return;
120}
121
122
123/* Split the current bucket.  This includes moving all items in the bucket to
124   a new bucket.  This doesn't require any disk reads because all hash values
125   are stored in the buckets.  Splitting the current bucket may require
126   doubling the size of the hash directory.  */
127void
128_gdbm_split_bucket (dbf, next_insert)
129     gdbm_file_info *dbf;
130     int next_insert;
131{
132  hash_bucket *bucket[2]; 	/* Pointers to the new buckets. */
133
134  int          new_bits;	/* The number of bits for the new buckets. */
135  int	       cache_0;		/* Location in the cache for the buckets. */
136  int	       cache_1;
137  off_t        adr_0;		/* File address of the new bucket 0. */
138  off_t        adr_1;		/* File address of the new bucket 1. */
139  avail_elem   old_bucket;	/* Avail Struct for the old bucket. */
140
141  off_t        dir_start0;	/* Used in updating the directory. */
142  off_t        dir_start1;
143  off_t        dir_end;
144
145  off_t       *new_dir;		/* Pointer to the new directory. */
146  off_t        dir_adr; 	/* Address of the new directory. */
147  int          dir_size;	/* Size of the new directory. */
148  off_t        old_adr[31]; 	/* Address of the old directories. */
149  int          old_size[31]; 	/* Size of the old directories. */
150  int	       old_count;	/* Number of old directories. */
151
152  int          index;		/* Used in array indexing. */
153  int          index1;		/* Used in array indexing. */
154  int          elem_loc;	/* Location in new bucket to put element. */
155  bucket_element *old_el;	/* Pointer into the old bucket. */
156  int	       select;		/* Used to index bucket during movement. */
157
158
159  /* No directories are yet old. */
160  old_count = 0;
161
162  if (dbf->bucket_cache == NULL)
163    {
164      if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1)
165        _gdbm_fatal(dbf, "couldn't init cache");
166    }
167
168  while (dbf->bucket->count == dbf->header->bucket_elems)
169    {
170      /* Initialize the "new" buckets in the cache. */
171      do
172	{
173	  dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
174	  cache_0 = dbf->last_read;
175	}
176      while (dbf->bucket_cache[cache_0].ca_bucket == dbf->bucket);
177      bucket[0] = dbf->bucket_cache[cache_0].ca_bucket;
178      if (dbf->bucket_cache[cache_0].ca_changed)
179	_gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_0]);
180      do
181	{
182	  dbf->last_read = (dbf->last_read + 1) % dbf->cache_size;
183	  cache_1 = dbf->last_read;
184	}
185      while (dbf->bucket_cache[cache_1].ca_bucket == dbf->bucket);
186      bucket[1] = dbf->bucket_cache[cache_1].ca_bucket;
187      if (dbf->bucket_cache[cache_1].ca_changed)
188	_gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_1]);
189      new_bits = dbf->bucket->bucket_bits+1;
190      _gdbm_new_bucket (dbf, bucket[0], new_bits);
191      _gdbm_new_bucket (dbf, bucket[1], new_bits);
192      adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size);
193      dbf->bucket_cache[cache_0].ca_adr = adr_0;
194      adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size);
195      dbf->bucket_cache[cache_1].ca_adr = adr_1;
196
197
198
199      /* Double the directory size if necessary. */
200      if (dbf->header->dir_bits == dbf->bucket->bucket_bits)
201	{
202	  dir_size = dbf->header->dir_size * 2;
203	  dir_adr  = _gdbm_alloc (dbf, dir_size);
204	  new_dir  = (off_t *) malloc (dir_size);
205	  if (new_dir == NULL) _gdbm_fatal (dbf, "malloc error");
206	  for (index = 0;
207	  	index < dbf->header->dir_size/sizeof (off_t); index++)
208	    {
209	      new_dir[2*index]   = dbf->dir[index];
210	      new_dir[2*index+1] = dbf->dir[index];
211	    }
212
213	  /* Update header. */
214	  old_adr[old_count] = dbf->header->dir;
215	  dbf->header->dir = dir_adr;
216	  old_size[old_count] = dbf->header->dir_size;
217	  dbf->header->dir_size = dir_size;
218	  dbf->header->dir_bits = new_bits;
219	  old_count++;
220
221	  /* Now update dbf.  */
222	  dbf->header_changed = TRUE;
223	  dbf->bucket_dir *= 2;
224	  free (dbf->dir);
225	  dbf->dir = new_dir;
226	}
227
228      /* Copy all elements in dbf->bucket into the new buckets. */
229      for (index = 0; index < dbf->header->bucket_elems; index++)
230	{
231	  old_el = & (dbf->bucket->h_table[index]);
232	  select = (old_el->hash_value >> (31-new_bits)) & 1;
233	  elem_loc = old_el->hash_value % dbf->header->bucket_elems;
234	  while (bucket[select]->h_table[elem_loc].hash_value != -1)
235	    elem_loc = (elem_loc + 1) % dbf->header->bucket_elems;
236	  bucket[select]->h_table[elem_loc] = *old_el;
237	  bucket[select]->count += 1;
238	}
239
240      /* Allocate avail space for the bucket[1]. */
241      bucket[1]->bucket_avail[0].av_adr
242	= _gdbm_alloc (dbf, dbf->header->block_size);
243      bucket[1]->bucket_avail[0].av_size = dbf->header->block_size;
244      bucket[1]->av_count = 1;
245
246      /* Copy the avail elements in dbf->bucket to bucket[0]. */
247      bucket[0]->av_count = dbf->bucket->av_count;
248      index = 0;
249      index1 = 0;
250      if (bucket[0]->av_count == BUCKET_AVAIL)
251	{
252	  /* The avail is full, move the first one to bucket[1]. */
253	  _gdbm_put_av_elem (dbf->bucket->bucket_avail[0],
254			     bucket[1]->bucket_avail,
255			     &bucket[1]->av_count, FALSE);
256	  index = 1;
257	  bucket[0]->av_count --;
258	}
259      for (; index < dbf->bucket->av_count; index++)
260	{
261	  bucket[0]->bucket_avail[index1++] = dbf->bucket->bucket_avail[index];
262	}
263
264      /* Update the directory.  We have new file addresses for both buckets. */
265      dir_start1 = (dbf->bucket_dir >> (dbf->header->dir_bits - new_bits)) | 1;
266      dir_end = (dir_start1 + 1) << (dbf->header->dir_bits - new_bits);
267      dir_start1 = dir_start1 << (dbf->header->dir_bits - new_bits);
268      dir_start0 = dir_start1 - (dir_end - dir_start1);
269      for (index = dir_start0; index < dir_start1; index++)
270	dbf->dir[index] = adr_0;
271      for (index = dir_start1; index < dir_end; index++)
272	dbf->dir[index] = adr_1;
273
274
275      /* Set changed flags. */
276      dbf->bucket_cache[cache_0].ca_changed = TRUE;
277      dbf->bucket_cache[cache_1].ca_changed = TRUE;
278      dbf->bucket_changed = TRUE;
279      dbf->directory_changed = TRUE;
280      dbf->second_changed = TRUE;
281
282      /* Update the cache! */
283      dbf->bucket_dir = next_insert >> (31-dbf->header->dir_bits);
284
285      /* Invalidate old cache entry. */
286      old_bucket.av_adr  = dbf->cache_entry->ca_adr;
287      old_bucket.av_size = dbf->header->bucket_size;
288      dbf->cache_entry->ca_adr = 0;
289      dbf->cache_entry->ca_changed = FALSE;
290
291      /* Set dbf->bucket to the proper bucket. */
292      if (dbf->dir[dbf->bucket_dir] == adr_0)
293	{
294	  dbf->bucket = bucket[0];
295	  dbf->cache_entry = &dbf->bucket_cache[cache_0];
296	  _gdbm_put_av_elem (old_bucket,
297			     bucket[1]->bucket_avail,
298			     &bucket[1]->av_count, FALSE);
299	}
300      else
301	{
302	  dbf->bucket = bucket[1];
303	  dbf->cache_entry = &dbf->bucket_cache[cache_1];
304	  _gdbm_put_av_elem (old_bucket,
305			     bucket[0]->bucket_avail,
306			     &bucket[0]->av_count, FALSE);
307	}
308
309    }
310
311  /* Get rid of old directories. */
312  for (index = 0; index < old_count; index++)
313    _gdbm_free (dbf, old_adr[index], old_size[index]);
314}
315
316
317/* The only place where a bucket is written.  CA_ENTRY is the
318   cache entry containing the bucket to be written. */
319
320void
321_gdbm_write_bucket (dbf, ca_entry)
322     gdbm_file_info *dbf;
323     cache_elem *ca_entry;
324{
325  int  num_bytes;	/* The return value for write. */
326  off_t file_pos;	/* The return value for lseek. */
327
328  file_pos = lseek (dbf->desc, ca_entry->ca_adr, L_SET);
329  if (file_pos != ca_entry->ca_adr)
330    _gdbm_fatal (dbf, "lseek error");
331  num_bytes = write (dbf->desc, ca_entry->ca_bucket, dbf->header->bucket_size);
332  if (num_bytes != dbf->header->bucket_size)
333    _gdbm_fatal (dbf, "write error");
334  ca_entry->ca_changed = FALSE;
335  ca_entry->ca_data.hash_val = -1;
336  ca_entry->ca_data.elem_loc = -1;
337}
338