1/* bucket.c - The routines for playing with hash buckets. */ 2 3/* This file is part of GDBM, the GNU data base manager, by Philip A. Nelson. 4 Copyright (C) 1990, 1991, 1993 Free Software Foundation, Inc. 5 6 GDBM is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GDBM is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GDBM; see the file COPYING. If not, write to 18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 19 20 You may contact the author by: 21 e-mail: phil@cs.wwu.edu 22 us-mail: Philip A. Nelson 23 Computer Science Department 24 Western Washington University 25 Bellingham, WA 98226 26 27*************************************************************************/ 28 29 30/* include system configuration before all else. */ 31#include "autoconf.h" 32 33#include "gdbmdefs.h" 34 35 36/* Initializing a new hash buckets sets all bucket entries to -1 hash value. */ 37void 38_gdbm_new_bucket (dbf, bucket, bits) 39 gdbm_file_info *dbf; 40 hash_bucket *bucket; 41 int bits; 42{ 43 int index; 44 45 /* Initialize the avail block. */ 46 bucket->av_count = 0; 47 48 /* Set the information fields first. */ 49 bucket->bucket_bits = bits; 50 bucket->count = 0; 51 52 /* Initialize all bucket elements. */ 53 for (index = 0; index < dbf->header->bucket_elems; index++) 54 bucket->h_table[index].hash_value = -1; 55} 56 57 58 59/* Find a bucket for DBF that is pointed to by the bucket directory from 60 location DIR_INDEX. The bucket cache is first checked to see if it 61 is already in memory. If not, a bucket may be tossed to read the new 62 bucket. In any case, the requested bucket is make the "current" bucket 63 and dbf->bucket points to the correct bucket. */ 64 65void 66_gdbm_get_bucket (dbf, dir_index) 67 gdbm_file_info *dbf; 68 int dir_index; 69{ 70 off_t bucket_adr; /* The address of the correct hash bucket. */ 71 int num_bytes; /* The number of bytes read. */ 72 off_t file_pos; /* The return address for lseek. */ 73 register int index; /* Loop index. */ 74 75 /* Initial set up. */ 76 dbf->bucket_dir = dir_index; 77 bucket_adr = dbf->dir [dir_index]; 78 79 if (dbf->bucket_cache == NULL) 80 { 81 if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1) 82 _gdbm_fatal(dbf, "couldn't init cache"); 83 } 84 85 /* Is that one is not already current, we must find it. */ 86 if (dbf->cache_entry->ca_adr != bucket_adr) 87 { 88 /* Look in the cache. */ 89 for (index = 0; index < dbf->cache_size; index++) 90 { 91 if (dbf->bucket_cache[index].ca_adr == bucket_adr) 92 { 93 dbf->bucket = dbf->bucket_cache[index].ca_bucket; 94 dbf->cache_entry = &dbf->bucket_cache[index]; 95 return; 96 } 97 } 98 99 /* It is not in the cache, read it from the disk. */ 100 dbf->last_read = (dbf->last_read + 1) % dbf->cache_size; 101 if (dbf->bucket_cache[dbf->last_read].ca_changed) 102 _gdbm_write_bucket (dbf, &dbf->bucket_cache[dbf->last_read]); 103 dbf->bucket_cache[dbf->last_read].ca_adr = bucket_adr; 104 dbf->bucket = dbf->bucket_cache[dbf->last_read].ca_bucket; 105 dbf->cache_entry = &dbf->bucket_cache[dbf->last_read]; 106 dbf->cache_entry->ca_data.elem_loc = -1; 107 dbf->cache_entry->ca_changed = FALSE; 108 109 /* Read the bucket. */ 110 file_pos = lseek (dbf->desc, bucket_adr, L_SET); 111 if (file_pos != bucket_adr) 112 _gdbm_fatal (dbf, "lseek error"); 113 114 num_bytes = read (dbf->desc, dbf->bucket, dbf->header->bucket_size); 115 if (num_bytes != dbf->header->bucket_size) 116 _gdbm_fatal (dbf, "read error"); 117 } 118 119 return; 120} 121 122 123/* Split the current bucket. This includes moving all items in the bucket to 124 a new bucket. This doesn't require any disk reads because all hash values 125 are stored in the buckets. Splitting the current bucket may require 126 doubling the size of the hash directory. */ 127void 128_gdbm_split_bucket (dbf, next_insert) 129 gdbm_file_info *dbf; 130 int next_insert; 131{ 132 hash_bucket *bucket[2]; /* Pointers to the new buckets. */ 133 134 int new_bits; /* The number of bits for the new buckets. */ 135 int cache_0; /* Location in the cache for the buckets. */ 136 int cache_1; 137 off_t adr_0; /* File address of the new bucket 0. */ 138 off_t adr_1; /* File address of the new bucket 1. */ 139 avail_elem old_bucket; /* Avail Struct for the old bucket. */ 140 141 off_t dir_start0; /* Used in updating the directory. */ 142 off_t dir_start1; 143 off_t dir_end; 144 145 off_t *new_dir; /* Pointer to the new directory. */ 146 off_t dir_adr; /* Address of the new directory. */ 147 int dir_size; /* Size of the new directory. */ 148 off_t old_adr[31]; /* Address of the old directories. */ 149 int old_size[31]; /* Size of the old directories. */ 150 int old_count; /* Number of old directories. */ 151 152 int index; /* Used in array indexing. */ 153 int index1; /* Used in array indexing. */ 154 int elem_loc; /* Location in new bucket to put element. */ 155 bucket_element *old_el; /* Pointer into the old bucket. */ 156 int select; /* Used to index bucket during movement. */ 157 158 159 /* No directories are yet old. */ 160 old_count = 0; 161 162 if (dbf->bucket_cache == NULL) 163 { 164 if(_gdbm_init_cache(dbf, DEFAULT_CACHESIZE) == -1) 165 _gdbm_fatal(dbf, "couldn't init cache"); 166 } 167 168 while (dbf->bucket->count == dbf->header->bucket_elems) 169 { 170 /* Initialize the "new" buckets in the cache. */ 171 do 172 { 173 dbf->last_read = (dbf->last_read + 1) % dbf->cache_size; 174 cache_0 = dbf->last_read; 175 } 176 while (dbf->bucket_cache[cache_0].ca_bucket == dbf->bucket); 177 bucket[0] = dbf->bucket_cache[cache_0].ca_bucket; 178 if (dbf->bucket_cache[cache_0].ca_changed) 179 _gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_0]); 180 do 181 { 182 dbf->last_read = (dbf->last_read + 1) % dbf->cache_size; 183 cache_1 = dbf->last_read; 184 } 185 while (dbf->bucket_cache[cache_1].ca_bucket == dbf->bucket); 186 bucket[1] = dbf->bucket_cache[cache_1].ca_bucket; 187 if (dbf->bucket_cache[cache_1].ca_changed) 188 _gdbm_write_bucket (dbf, &dbf->bucket_cache[cache_1]); 189 new_bits = dbf->bucket->bucket_bits+1; 190 _gdbm_new_bucket (dbf, bucket[0], new_bits); 191 _gdbm_new_bucket (dbf, bucket[1], new_bits); 192 adr_0 = _gdbm_alloc (dbf, dbf->header->bucket_size); 193 dbf->bucket_cache[cache_0].ca_adr = adr_0; 194 adr_1 = _gdbm_alloc (dbf, dbf->header->bucket_size); 195 dbf->bucket_cache[cache_1].ca_adr = adr_1; 196 197 198 199 /* Double the directory size if necessary. */ 200 if (dbf->header->dir_bits == dbf->bucket->bucket_bits) 201 { 202 dir_size = dbf->header->dir_size * 2; 203 dir_adr = _gdbm_alloc (dbf, dir_size); 204 new_dir = (off_t *) malloc (dir_size); 205 if (new_dir == NULL) _gdbm_fatal (dbf, "malloc error"); 206 for (index = 0; 207 index < dbf->header->dir_size/sizeof (off_t); index++) 208 { 209 new_dir[2*index] = dbf->dir[index]; 210 new_dir[2*index+1] = dbf->dir[index]; 211 } 212 213 /* Update header. */ 214 old_adr[old_count] = dbf->header->dir; 215 dbf->header->dir = dir_adr; 216 old_size[old_count] = dbf->header->dir_size; 217 dbf->header->dir_size = dir_size; 218 dbf->header->dir_bits = new_bits; 219 old_count++; 220 221 /* Now update dbf. */ 222 dbf->header_changed = TRUE; 223 dbf->bucket_dir *= 2; 224 free (dbf->dir); 225 dbf->dir = new_dir; 226 } 227 228 /* Copy all elements in dbf->bucket into the new buckets. */ 229 for (index = 0; index < dbf->header->bucket_elems; index++) 230 { 231 old_el = & (dbf->bucket->h_table[index]); 232 select = (old_el->hash_value >> (31-new_bits)) & 1; 233 elem_loc = old_el->hash_value % dbf->header->bucket_elems; 234 while (bucket[select]->h_table[elem_loc].hash_value != -1) 235 elem_loc = (elem_loc + 1) % dbf->header->bucket_elems; 236 bucket[select]->h_table[elem_loc] = *old_el; 237 bucket[select]->count += 1; 238 } 239 240 /* Allocate avail space for the bucket[1]. */ 241 bucket[1]->bucket_avail[0].av_adr 242 = _gdbm_alloc (dbf, dbf->header->block_size); 243 bucket[1]->bucket_avail[0].av_size = dbf->header->block_size; 244 bucket[1]->av_count = 1; 245 246 /* Copy the avail elements in dbf->bucket to bucket[0]. */ 247 bucket[0]->av_count = dbf->bucket->av_count; 248 index = 0; 249 index1 = 0; 250 if (bucket[0]->av_count == BUCKET_AVAIL) 251 { 252 /* The avail is full, move the first one to bucket[1]. */ 253 _gdbm_put_av_elem (dbf->bucket->bucket_avail[0], 254 bucket[1]->bucket_avail, 255 &bucket[1]->av_count, FALSE); 256 index = 1; 257 bucket[0]->av_count --; 258 } 259 for (; index < dbf->bucket->av_count; index++) 260 { 261 bucket[0]->bucket_avail[index1++] = dbf->bucket->bucket_avail[index]; 262 } 263 264 /* Update the directory. We have new file addresses for both buckets. */ 265 dir_start1 = (dbf->bucket_dir >> (dbf->header->dir_bits - new_bits)) | 1; 266 dir_end = (dir_start1 + 1) << (dbf->header->dir_bits - new_bits); 267 dir_start1 = dir_start1 << (dbf->header->dir_bits - new_bits); 268 dir_start0 = dir_start1 - (dir_end - dir_start1); 269 for (index = dir_start0; index < dir_start1; index++) 270 dbf->dir[index] = adr_0; 271 for (index = dir_start1; index < dir_end; index++) 272 dbf->dir[index] = adr_1; 273 274 275 /* Set changed flags. */ 276 dbf->bucket_cache[cache_0].ca_changed = TRUE; 277 dbf->bucket_cache[cache_1].ca_changed = TRUE; 278 dbf->bucket_changed = TRUE; 279 dbf->directory_changed = TRUE; 280 dbf->second_changed = TRUE; 281 282 /* Update the cache! */ 283 dbf->bucket_dir = next_insert >> (31-dbf->header->dir_bits); 284 285 /* Invalidate old cache entry. */ 286 old_bucket.av_adr = dbf->cache_entry->ca_adr; 287 old_bucket.av_size = dbf->header->bucket_size; 288 dbf->cache_entry->ca_adr = 0; 289 dbf->cache_entry->ca_changed = FALSE; 290 291 /* Set dbf->bucket to the proper bucket. */ 292 if (dbf->dir[dbf->bucket_dir] == adr_0) 293 { 294 dbf->bucket = bucket[0]; 295 dbf->cache_entry = &dbf->bucket_cache[cache_0]; 296 _gdbm_put_av_elem (old_bucket, 297 bucket[1]->bucket_avail, 298 &bucket[1]->av_count, FALSE); 299 } 300 else 301 { 302 dbf->bucket = bucket[1]; 303 dbf->cache_entry = &dbf->bucket_cache[cache_1]; 304 _gdbm_put_av_elem (old_bucket, 305 bucket[0]->bucket_avail, 306 &bucket[0]->av_count, FALSE); 307 } 308 309 } 310 311 /* Get rid of old directories. */ 312 for (index = 0; index < old_count; index++) 313 _gdbm_free (dbf, old_adr[index], old_size[index]); 314} 315 316 317/* The only place where a bucket is written. CA_ENTRY is the 318 cache entry containing the bucket to be written. */ 319 320void 321_gdbm_write_bucket (dbf, ca_entry) 322 gdbm_file_info *dbf; 323 cache_elem *ca_entry; 324{ 325 int num_bytes; /* The return value for write. */ 326 off_t file_pos; /* The return value for lseek. */ 327 328 file_pos = lseek (dbf->desc, ca_entry->ca_adr, L_SET); 329 if (file_pos != ca_entry->ca_adr) 330 _gdbm_fatal (dbf, "lseek error"); 331 num_bytes = write (dbf->desc, ca_entry->ca_bucket, dbf->header->bucket_size); 332 if (num_bytes != dbf->header->bucket_size) 333 _gdbm_fatal (dbf, "write error"); 334 ca_entry->ca_changed = FALSE; 335 ca_entry->ca_data.hash_val = -1; 336 ca_entry->ca_data.elem_loc = -1; 337} 338