1/* NetBSD: tdb.c,v 1.7 2013/10/19 17:16:38 christos Exp */ 2 3/* 4 * Database functions 5 * Copyright (C) Andrew Tridgell 1999 6 * 7 * Redistribution and use in source and binary forms are permitted 8 * provided that the above copyright notice and this paragraph are 9 * duplicated in all such forms AND provided that this software or 10 * any derived work is only used as part of the PPP daemon (pppd) 11 * and related utilities. 12 * The name of the author may not be used to endorse or promote products 13 * derived from this software without specific prior written permission. 14 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 16 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 17 * 18 * Note: this software is also available under the Gnu Public License 19 * version 2 or later. 20 */ 21 22#include <sys/cdefs.h> 23#ifndef lint 24__RCSID("NetBSD: tdb.c,v 1.7 2013/10/19 17:16:38 christos Exp "); 25#endif 26 27#include <stdlib.h> 28#include <stdio.h> 29#include <fcntl.h> 30#include <unistd.h> 31#include <string.h> 32#include <errno.h> 33#include <sys/mman.h> 34#include <sys/stat.h> 35#include "tdb.h" 36 37#define TDB_VERSION (0x26011967 + 1) 38#define TDB_MAGIC (0x26011999U) 39#define TDB_FREE_MAGIC (~TDB_MAGIC) 40#define TDB_ALIGN 4 41#define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGN) 42#define DEFAULT_HASH_SIZE 128 43#define TDB_PAGE_SIZE 0x2000 44#define TDB_LEN_MULTIPLIER 10 45#define FREELIST_TOP (sizeof(struct tdb_header)) 46 47#define LOCK_SET 1 48#define LOCK_CLEAR 0 49 50/* lock offsets */ 51#define GLOBAL_LOCK 0 52#define ACTIVE_LOCK 4 53#define LIST_LOCK_BASE 1024 54 55#define BUCKET(hash) ((hash) % tdb->header.hash_size) 56 57#ifndef MAP_FILE 58#define MAP_FILE 0 59#endif 60 61/* the body of the database is made of one list_struct for the free space 62 plus a separate data list for each hash value */ 63struct list_struct { 64 tdb_len rec_len; /* total byte length of record */ 65 tdb_off next; /* offset of the next record in the list */ 66 tdb_len key_len; /* byte length of key */ 67 tdb_len data_len; /* byte length of data */ 68 unsigned full_hash; /* the full 32 bit hash of the key */ 69 unsigned magic; /* try to catch errors */ 70 /* 71 the following union is implied 72 union { 73 char record[rec_len]; 74 struct { 75 char key[key_len]; 76 char data[data_len]; 77 } 78 } 79 */ 80}; 81 82/* a null data record - useful for error returns */ 83static TDB_DATA null_data; 84 85static int tdb_update __P((TDB_CONTEXT *, TDB_DATA, TDB_DATA)); 86#ifdef notyet 87static int tdb_lockchain __P((TDB_CONTEXT *, TDB_DATA)); 88static int tdb_unlockchain __P((TDB_CONTEXT *, TDB_DATA)); 89#endif 90 91/* a byte range locking function - return 0 on success 92 this functions locks/unlocks 1 byte at the specified offset */ 93static int tdb_brlock(TDB_CONTEXT *tdb, tdb_off offset, 94 int set, int rw_type, int lck_type) 95{ 96#if NOLOCK 97 return 0; 98#else 99 struct flock fl; 100 101 if (tdb->fd == -1) return 0; /* for in memory tdb */ 102 103 if (tdb->read_only) return -1; 104 105 fl.l_type = set==LOCK_SET?rw_type:F_UNLCK; 106 fl.l_whence = SEEK_SET; 107 fl.l_start = offset; 108 fl.l_len = 1; 109 fl.l_pid = 0; 110 111 if (fcntl(tdb->fd, lck_type, &fl) != 0) { 112#if TDB_DEBUG 113 if (lck_type == F_SETLKW) { 114 printf("lock %d failed at %d (%s)\n", 115 set, offset, strerror(errno)); 116 } 117#endif 118 tdb->ecode = TDB_ERR_LOCK; 119 return -1; 120 } 121 return 0; 122#endif 123} 124 125/* lock a list in the database. list -1 is the alloc list */ 126static int tdb_lock(TDB_CONTEXT *tdb, int list) 127{ 128 if (list < -1 || list >= (int)tdb->header.hash_size) { 129#if TDB_DEBUG 130 printf("bad list %d\n", list); 131#endif 132 return -1; 133 } 134 if (tdb->locked[list+1] == 0) { 135 if (tdb_brlock(tdb, LIST_LOCK_BASE + 4*list, LOCK_SET, 136 F_WRLCK, F_SETLKW) != 0) { 137 return -1; 138 } 139 } 140 tdb->locked[list+1]++; 141 return 0; 142} 143 144/* unlock the database. */ 145static int tdb_unlock(TDB_CONTEXT *tdb, int list) 146{ 147 if (list < -1 || list >= (int)tdb->header.hash_size) { 148#if TDB_DEBUG 149 printf("bad unlock list %d\n", list); 150#endif 151 return -1; 152 } 153 154 if (tdb->locked[list+1] == 0) { 155#if TDB_DEBUG 156 printf("not locked %d\n", list); 157#endif 158 tdb->ecode = TDB_ERR_LOCK; 159 return -1; 160 } 161 if (tdb->locked[list+1] == 1) { 162 if (tdb_brlock(tdb, LIST_LOCK_BASE + 4*list, LOCK_CLEAR, 163 F_WRLCK, F_SETLKW) != 0) { 164 return -1; 165 } 166 } 167 tdb->locked[list+1]--; 168 return 0; 169} 170 171/* the hash algorithm - turn a key into an integer 172 This is based on the hash agorithm from gdbm */ 173static unsigned tdb_hash(TDB_DATA *key) 174{ 175 unsigned value; /* Used to compute the hash value. */ 176 unsigned i; /* Used to cycle through random values. */ 177 178 /* Set the initial value from the key size. */ 179 value = 0x238F13AF * key->dsize; 180 for (i=0; i < key->dsize; i++) { 181 value = (value + (key->dptr[i] << (i*5 % 24))); 182 } 183 184 value = (1103515243 * value + 12345); 185 186 return value; 187} 188 189/* find the top of the hash chain for an open database */ 190static tdb_off tdb_hash_top(TDB_CONTEXT *tdb, unsigned hash) 191{ 192 tdb_off ret; 193 hash = BUCKET(hash); 194 ret = FREELIST_TOP + (hash+1)*sizeof(tdb_off); 195 return ret; 196} 197 198 199/* check for an out of bounds access - if it is out of bounds then 200 see if the database has been expanded by someone else and expand 201 if necessary */ 202static int tdb_oob(TDB_CONTEXT *tdb, tdb_off offset) 203{ 204 struct stat st; 205 if ((offset <= tdb->map_size) || (tdb->fd == -1)) return 0; 206 207 fstat(tdb->fd, &st); 208 if (st.st_size <= (ssize_t)offset) { 209 tdb->ecode = TDB_ERR_IO; 210 return -1; 211 } 212 213#if HAVE_MMAP 214 if (tdb->map_ptr) { 215 munmap(tdb->map_ptr, tdb->map_size); 216 tdb->map_ptr = NULL; 217 } 218#endif 219 220 tdb->map_size = st.st_size; 221#if HAVE_MMAP 222 tdb->map_ptr = (void *)mmap(NULL, tdb->map_size, 223 tdb->read_only?PROT_READ:PROT_READ|PROT_WRITE, 224 MAP_SHARED | MAP_FILE, tdb->fd, 0); 225 if (tdb->map_ptr == MAP_FAILED) 226 tdb->map_ptr = NULL; 227#endif 228 return 0; 229} 230 231 232/* write a lump of data at a specified offset */ 233static int tdb_write(TDB_CONTEXT *tdb, tdb_off offset, const char *buf, tdb_len len) 234{ 235 if (tdb_oob(tdb, offset + len) != 0) { 236 /* oops - trying to write beyond the end of the database! */ 237 return -1; 238 } 239 240 if (tdb->map_ptr) { 241 memcpy(offset + (char *)tdb->map_ptr, buf, len); 242 } else { 243 if (lseek(tdb->fd, offset, SEEK_SET) != offset || 244 write(tdb->fd, buf, len) != (ssize_t)len) { 245 tdb->ecode = TDB_ERR_IO; 246 return -1; 247 } 248 } 249 return 0; 250} 251 252/* read a lump of data at a specified offset */ 253static int tdb_read(TDB_CONTEXT *tdb, tdb_off offset, char *buf, tdb_len len) 254{ 255 if (tdb_oob(tdb, offset + len) != 0) { 256 /* oops - trying to read beyond the end of the database! */ 257 return -1; 258 } 259 260 if (tdb->map_ptr) { 261 memcpy(buf, offset + (char *)tdb->map_ptr, len); 262 } else { 263 if (lseek(tdb->fd, offset, SEEK_SET) != offset || 264 read(tdb->fd, buf, len) != (ssize_t)len) { 265 tdb->ecode = TDB_ERR_IO; 266 return -1; 267 } 268 } 269 return 0; 270} 271 272 273/* read a lump of data, allocating the space for it */ 274static char *tdb_alloc_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_len len) 275{ 276 char *buf; 277 278 buf = (char *)malloc(len); 279 280 if (!buf) { 281 tdb->ecode = TDB_ERR_OOM; 282 return NULL; 283 } 284 285 if (tdb_read(tdb, offset, buf, len) == -1) { 286 free(buf); 287 return NULL; 288 } 289 290 return buf; 291} 292 293/* convenience routine for writing a record */ 294static int rec_write(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec) 295{ 296 return tdb_write(tdb, offset, (char *)rec, sizeof(*rec)); 297} 298 299/* convenience routine for writing a tdb_off */ 300static int ofs_write(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d) 301{ 302 return tdb_write(tdb, offset, (char *)d, sizeof(*d)); 303} 304 305/* read a tdb_off from the store */ 306static int ofs_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d) 307{ 308 return tdb_read(tdb, offset, (char *)d, sizeof(*d)); 309} 310 311/* read a record and check for simple errors */ 312static int rec_read(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec) 313{ 314 if (tdb_read(tdb, offset, (char *)rec, sizeof(*rec)) == -1) return -1; 315 if (rec->magic != TDB_MAGIC) { 316#if TDB_DEBUG 317 printf("bad magic 0x%08x at offset %d\n", 318 rec->magic, offset); 319#endif 320 tdb->ecode = TDB_ERR_CORRUPT; 321 return -1; 322 } 323 if (tdb_oob(tdb, rec->next) != 0) { 324 return -1; 325 } 326 return 0; 327} 328 329/* expand the database at least length bytes by expanding the 330 underlying file and doing the mmap again if necessary */ 331static int tdb_expand(TDB_CONTEXT *tdb, tdb_off length) 332{ 333 struct list_struct rec; 334 tdb_off offset, ptr; 335 char b = 0; 336 337 tdb_lock(tdb,-1); 338 339 /* make sure we know about any previous expansions by another 340 process */ 341 tdb_oob(tdb,tdb->map_size + 1); 342 343 /* always make room for at least 10 more records */ 344 length *= TDB_LEN_MULTIPLIER; 345 346 /* and round the database up to a multiple of TDB_PAGE_SIZE */ 347 length = ((tdb->map_size + length + TDB_PAGE_SIZE) & ~(TDB_PAGE_SIZE - 1)) - tdb->map_size; 348 349 /* expand the file itself */ 350 if (tdb->fd != -1) { 351 lseek(tdb->fd, tdb->map_size + length - 1, SEEK_SET); 352 if (write(tdb->fd, &b, 1) != 1) goto fail; 353 } 354 355 /* form a new freelist record */ 356 offset = FREELIST_TOP; 357 rec.rec_len = length - sizeof(rec); 358 rec.magic = TDB_FREE_MAGIC; 359 if (ofs_read(tdb, offset, &rec.next) == -1) { 360 goto fail; 361 } 362 363#if HAVE_MMAP 364 if (tdb->fd != -1 && tdb->map_ptr) { 365 munmap(tdb->map_ptr, tdb->map_size); 366 tdb->map_ptr = NULL; 367 } 368#endif 369 370 tdb->map_size += length; 371 372 if (tdb->fd == -1) { 373 void *n; 374 n = realloc(tdb->map_ptr, tdb->map_size); 375 if (!n) 376 goto fail; 377 tdb->map_ptr = n; 378 } 379 380 /* write it out */ 381 if (rec_write(tdb, tdb->map_size - length, &rec) == -1) { 382 goto fail; 383 } 384 385 /* link it into the free list */ 386 ptr = tdb->map_size - length; 387 if (ofs_write(tdb, offset, &ptr) == -1) goto fail; 388 389#if HAVE_MMAP 390 if (tdb->fd != -1) { 391 tdb->map_ptr = (void *)mmap(NULL, tdb->map_size, 392 PROT_READ|PROT_WRITE, 393 MAP_SHARED | MAP_FILE, tdb->fd, 0); 394 if (tdb->map_ptr == MAP_FAILED) 395 tdb->map_ptr = NULL; 396 } 397#endif 398 399 tdb_unlock(tdb, -1); 400 return 0; 401 402 fail: 403 tdb_unlock(tdb,-1); 404 return -1; 405} 406 407/* allocate some space from the free list. The offset returned points 408 to a unconnected list_struct within the database with room for at 409 least length bytes of total data 410 411 0 is returned if the space could not be allocated 412 */ 413static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length) 414{ 415 tdb_off offset, rec_ptr, last_ptr; 416 struct list_struct rec, lastrec, newrec; 417 418 tdb_lock(tdb, -1); 419 420 again: 421 last_ptr = 0; 422 offset = FREELIST_TOP; 423 424 /* read in the freelist top */ 425 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 426 goto fail; 427 } 428 429 /* keep looking until we find a freelist record that is big 430 enough */ 431 while (rec_ptr) { 432 if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) { 433 goto fail; 434 } 435 436 if (rec.magic != TDB_FREE_MAGIC) { 437#if TDB_DEBUG 438 printf("bad magic 0x%08x in free list\n", rec.magic); 439#endif 440 goto fail; 441 } 442 443 if (rec.rec_len >= length) { 444 /* found it - now possibly split it up */ 445 if (rec.rec_len > length + MIN_REC_SIZE) { 446 length = (length + TDB_ALIGN) & ~(TDB_ALIGN-1); 447 448 newrec.rec_len = rec.rec_len - (sizeof(rec) + length); 449 newrec.next = rec.next; 450 newrec.magic = TDB_FREE_MAGIC; 451 452 rec.rec_len = length; 453 rec.next = rec_ptr + sizeof(rec) + rec.rec_len; 454 455 if (rec_write(tdb, rec.next, &newrec) == -1) { 456 goto fail; 457 } 458 459 if (rec_write(tdb, rec_ptr, &rec) == -1) { 460 goto fail; 461 } 462 } 463 464 /* remove it from the list */ 465 if (last_ptr == 0) { 466 offset = FREELIST_TOP; 467 468 if (ofs_write(tdb, offset, &rec.next) == -1) { 469 goto fail; 470 } 471 } else { 472 lastrec.next = rec.next; 473 if (rec_write(tdb, last_ptr, &lastrec) == -1) { 474 goto fail; 475 } 476 } 477 478 /* all done - return the new record offset */ 479 tdb_unlock(tdb, -1); 480 return rec_ptr; 481 } 482 483 /* move to the next record */ 484 lastrec = rec; 485 last_ptr = rec_ptr; 486 rec_ptr = rec.next; 487 } 488 489 /* we didn't find enough space. See if we can expand the 490 database and if we can then try again */ 491 if (tdb_expand(tdb, length + sizeof(rec)) == 0) goto again; 492 493 fail: 494#if TDB_DEBUG 495 printf("tdb_allocate failed for size %u\n", length); 496#endif 497 tdb_unlock(tdb, -1); 498 return 0; 499} 500 501/* initialise a new database with a specified hash size */ 502static int tdb_new_database(TDB_CONTEXT *tdb, int hash_size) 503{ 504 struct tdb_header header; 505 int i, size = 0; 506 tdb_off buf[16]; 507 508 /* create the header */ 509 memset(&header, 0, sizeof(header)); 510 memcpy(header.magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1); 511 header.version = TDB_VERSION; 512 header.hash_size = hash_size; 513 lseek(tdb->fd, 0, SEEK_SET); 514 ftruncate(tdb->fd, 0); 515 516 if (tdb->fd != -1 && write(tdb->fd, &header, sizeof(header)) != 517 sizeof(header)) { 518 tdb->ecode = TDB_ERR_IO; 519 return -1; 520 } else size += sizeof(header); 521 522 /* the freelist and hash pointers */ 523 memset(buf, 0, sizeof(buf)); 524 525 for (i=0;(hash_size+1)-i >= 16; i += 16) { 526 if (tdb->fd != -1 && write(tdb->fd, buf, sizeof(buf)) != 527 sizeof(buf)) { 528 tdb->ecode = TDB_ERR_IO; 529 return -1; 530 } else size += sizeof(buf); 531 } 532 533 for (;i<hash_size+1; i++) { 534 if (tdb->fd != -1 && write(tdb->fd, buf, sizeof(tdb_off)) != 535 sizeof(tdb_off)) { 536 tdb->ecode = TDB_ERR_IO; 537 return -1; 538 } else size += sizeof(tdb_off); 539 } 540 541 if (tdb->fd == -1) { 542 tdb->map_ptr = calloc(size, 1); 543 tdb->map_size = size; 544 if (tdb->map_ptr == NULL) { 545 tdb->ecode = TDB_ERR_IO; 546 return -1; 547 } 548 memcpy(&tdb->header, &header, sizeof(header)); 549 } 550 551#if TDB_DEBUG 552 printf("initialised database of hash_size %u\n", 553 hash_size); 554#endif 555 return 0; 556} 557 558/* Returns 0 on fail. On success, return offset of record, and fills 559 in rec */ 560static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, unsigned int hash, 561 struct list_struct *rec) 562{ 563 tdb_off offset, rec_ptr; 564 565 /* find the top of the hash chain */ 566 offset = tdb_hash_top(tdb, hash); 567 568 /* read in the hash top */ 569 if (ofs_read(tdb, offset, &rec_ptr) == -1) 570 return 0; 571 572 /* keep looking until we find the right record */ 573 while (rec_ptr) { 574 if (rec_read(tdb, rec_ptr, rec) == -1) 575 return 0; 576 577 if (hash == rec->full_hash && key.dsize == rec->key_len) { 578 char *k; 579 /* a very likely hit - read the key */ 580 k = tdb_alloc_read(tdb, rec_ptr + sizeof(*rec), 581 rec->key_len); 582 583 if (!k) 584 return 0; 585 586 if (memcmp(key.dptr, k, key.dsize) == 0) { 587 free(k); 588 return rec_ptr; 589 } 590 free(k); 591 } 592 593 /* move to the next record */ 594 rec_ptr = rec->next; 595 } 596 return 0; 597} 598 599/* 600 return an error string for the last tdb error 601*/ 602char *tdb_errorstr(TDB_CONTEXT *tdb) 603{ 604 int i; 605 static struct { 606 enum TDB_ERROR ecode; 607 char *estring; 608 } emap[] = { 609 {TDB_SUCCESS, "Success"}, 610 {TDB_ERR_CORRUPT, "Corrupt database"}, 611 {TDB_ERR_IO, "IO Error"}, 612 {TDB_ERR_LOCK, "Locking error"}, 613 {TDB_ERR_OOM, "Out of memory"}, 614 {TDB_ERR_EXISTS, "Record exists"}, 615 {-1, NULL}}; 616 if (tdb != NULL) { 617 for (i=0;emap[i].estring;i++) { 618 if (tdb->ecode == emap[i].ecode) return emap[i].estring; 619 } 620 } else { 621 return "Invalid tdb context"; 622 } 623 return "Invalid error code"; 624} 625 626 627/* update an entry in place - this only works if the new data size 628 is <= the old data size and the key exists. 629 on failure return -1 630*/ 631static int tdb_update(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf) 632{ 633 unsigned hash; 634 struct list_struct rec; 635 tdb_off rec_ptr; 636 int ret = -1; 637 638 if (tdb == NULL) { 639#ifdef TDB_DEBUG 640 printf("tdb_update() called with null context\n"); 641#endif 642 return -1; 643 } 644 645 /* find which hash bucket it is in */ 646 hash = tdb_hash(&key); 647 648 tdb_lock(tdb, BUCKET(hash)); 649 rec_ptr = tdb_find(tdb, key, hash, &rec); 650 651 if (!rec_ptr) 652 goto out; 653 654 /* must be long enough */ 655 if (rec.rec_len < key.dsize + dbuf.dsize) 656 goto out; 657 658 if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len, 659 dbuf.dptr, dbuf.dsize) == -1) 660 goto out; 661 662 if (dbuf.dsize != rec.data_len) { 663 /* update size */ 664 rec.data_len = dbuf.dsize; 665 ret = rec_write(tdb, rec_ptr, &rec); 666 } else 667 ret = 0; 668 669 out: 670 tdb_unlock(tdb, BUCKET(hash)); 671 return ret; 672} 673 674/* find an entry in the database given a key */ 675TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key) 676{ 677 unsigned hash; 678 tdb_off rec_ptr; 679 struct list_struct rec; 680 TDB_DATA ret = null_data; 681 682 if (tdb == NULL) { 683#ifdef TDB_DEBUG 684 printf("tdb_fetch() called with null context\n"); 685#endif 686 return null_data; 687 } 688 689 /* find which hash bucket it is in */ 690 hash = tdb_hash(&key); 691 692 tdb_lock(tdb, BUCKET(hash)); 693 rec_ptr = tdb_find(tdb, key, hash, &rec); 694 695 if (rec_ptr) { 696 ret.dptr = tdb_alloc_read(tdb, 697 rec_ptr + sizeof(rec) + rec.key_len, 698 rec.data_len); 699 ret.dsize = rec.data_len; 700 } 701 702 tdb_unlock(tdb, BUCKET(hash)); 703 return ret; 704} 705 706/* check if an entry in the database exists 707 708 note that 1 is returned if the key is found and 0 is returned if not found 709 this doesn't match the conventions in the rest of this module, but is 710 compatible with gdbm 711*/ 712int tdb_exists(TDB_CONTEXT *tdb, TDB_DATA key) 713{ 714 unsigned hash; 715 tdb_off rec_ptr; 716 struct list_struct rec; 717 718 if (tdb == NULL) { 719#ifdef TDB_DEBUG 720 printf("tdb_exists() called with null context\n"); 721#endif 722 return 0; 723 } 724 725 /* find which hash bucket it is in */ 726 hash = tdb_hash(&key); 727 728 tdb_lock(tdb, BUCKET(hash)); 729 rec_ptr = tdb_find(tdb, key, hash, &rec); 730 tdb_unlock(tdb, BUCKET(hash)); 731 732 return rec_ptr != 0; 733} 734 735/* traverse the entire database - calling fn(tdb, key, data) on each element. 736 return -1 on error or the record count traversed 737 if fn is NULL then it is not called 738 a non-zero return value from fn() indicates that the traversal should stop 739 */ 740int tdb_traverse(TDB_CONTEXT *tdb, int (*fn)(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, void* state), void* state) 741{ 742 int count = 0; 743 unsigned h; 744 tdb_off offset, rec_ptr; 745 struct list_struct rec; 746 char *data; 747 TDB_DATA key, dbuf; 748 749 if (tdb == NULL) { 750#ifdef TDB_DEBUG 751 printf("tdb_traverse() called with null context\n"); 752#endif 753 return -1; 754 } 755 756 /* loop over all hash chains */ 757 for (h = 0; h < tdb->header.hash_size; h++) { 758 tdb_lock(tdb, BUCKET(h)); 759 760 /* read in the hash top */ 761 offset = tdb_hash_top(tdb, h); 762 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 763 goto fail; 764 } 765 766 /* traverse all records for this hash */ 767 while (rec_ptr) { 768 if (rec_read(tdb, rec_ptr, &rec) == -1) { 769 goto fail; 770 } 771 772 /* now read the full record */ 773 data = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), 774 rec.key_len + rec.data_len); 775 if (!data) { 776 goto fail; 777 } 778 779 key.dptr = data; 780 key.dsize = rec.key_len; 781 dbuf.dptr = data + rec.key_len; 782 dbuf.dsize = rec.data_len; 783 count++; 784 785 if (fn && fn(tdb, key, dbuf, state) != 0) { 786 /* they want us to stop traversing */ 787 free(data); 788 tdb_unlock(tdb, BUCKET(h)); 789 return count; 790 } 791 792 /* a miss - drat */ 793 free(data); 794 795 /* move to the next record */ 796 rec_ptr = rec.next; 797 } 798 tdb_unlock(tdb, BUCKET(h)); 799 } 800 801 /* return the number traversed */ 802 return count; 803 804 fail: 805 tdb_unlock(tdb, BUCKET(h)); 806 return -1; 807} 808 809 810/* find the first entry in the database and return its key */ 811TDB_DATA tdb_firstkey(TDB_CONTEXT *tdb) 812{ 813 tdb_off offset, rec_ptr; 814 struct list_struct rec; 815 unsigned hash; 816 TDB_DATA ret; 817 818 if (tdb == NULL) { 819#ifdef TDB_DEBUG 820 printf("tdb_firstkey() called with null context\n"); 821#endif 822 return null_data; 823 } 824 825 /* look for a non-empty hash chain */ 826 for (hash = 0, rec_ptr = 0; 827 hash < tdb->header.hash_size; 828 hash++) { 829 /* find the top of the hash chain */ 830 offset = tdb_hash_top(tdb, hash); 831 832 tdb_lock(tdb, BUCKET(hash)); 833 834 /* read in the hash top */ 835 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 836 goto fail; 837 } 838 839 if (rec_ptr) break; 840 841 tdb_unlock(tdb, BUCKET(hash)); 842 } 843 844 if (rec_ptr == 0) return null_data; 845 846 /* we've found a non-empty chain, now read the record */ 847 if (rec_read(tdb, rec_ptr, &rec) == -1) { 848 goto fail; 849 } 850 851 /* allocate and read the key space */ 852 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len); 853 ret.dsize = rec.key_len; 854 tdb_unlock(tdb, BUCKET(hash)); 855 return ret; 856 857 fail: 858 tdb_unlock(tdb, BUCKET(hash)); 859 return null_data; 860} 861 862/* find the next entry in the database, returning its key */ 863TDB_DATA tdb_nextkey(TDB_CONTEXT *tdb, TDB_DATA key) 864{ 865 unsigned hash, hbucket; 866 tdb_off rec_ptr, offset; 867 struct list_struct rec; 868 TDB_DATA ret; 869 870 if (tdb == NULL) { 871#ifdef TDB_DEBUG 872 printf("tdb_nextkey() called with null context\n"); 873#endif 874 return null_data; 875 } 876 877 /* find which hash bucket it is in */ 878 hash = tdb_hash(&key); 879 hbucket = BUCKET(hash); 880 881 tdb_lock(tdb, hbucket); 882 rec_ptr = tdb_find(tdb, key, hash, &rec); 883 if (rec_ptr) { 884 /* we want the next record after this one */ 885 rec_ptr = rec.next; 886 } 887 888 /* not found or last in hash: look for next non-empty hash chain */ 889 while (rec_ptr == 0) { 890 tdb_unlock(tdb, hbucket); 891 892 if (++hbucket >= tdb->header.hash_size - 1) 893 return null_data; 894 895 offset = tdb_hash_top(tdb, hbucket); 896 tdb_lock(tdb, hbucket); 897 /* read in the hash top */ 898 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 899 tdb_unlock(tdb, hbucket); 900 return null_data; 901 } 902 } 903 904 /* Read the record. */ 905 if (rec_read(tdb, rec_ptr, &rec) == -1) { 906 tdb_unlock(tdb, hbucket); 907 return null_data; 908 } 909 /* allocate and read the key */ 910 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len); 911 ret.dsize = rec.key_len; 912 tdb_unlock(tdb, hbucket); 913 914 return ret; 915} 916 917/* delete an entry in the database given a key */ 918int tdb_delete(TDB_CONTEXT *tdb, TDB_DATA key) 919{ 920 unsigned hash; 921 tdb_off offset, rec_ptr, last_ptr; 922 struct list_struct rec, lastrec; 923 char *data = NULL; 924 925 if (tdb == NULL) { 926#ifdef TDB_DEBUG 927 printf("tdb_delete() called with null context\n"); 928#endif 929 return -1; 930 } 931 932 /* find which hash bucket it is in */ 933 hash = tdb_hash(&key); 934 935 tdb_lock(tdb, BUCKET(hash)); 936 937 /* find the top of the hash chain */ 938 offset = tdb_hash_top(tdb, hash); 939 940 /* read in the hash top */ 941 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 942 goto fail; 943 } 944 945 last_ptr = 0; 946 947 /* keep looking until we find the right record */ 948 while (rec_ptr) { 949 if (rec_read(tdb, rec_ptr, &rec) == -1) { 950 goto fail; 951 } 952 953 if (hash == rec.full_hash && key.dsize == rec.key_len) { 954 /* a very likely hit - read the record and full key */ 955 data = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), 956 rec.key_len); 957 if (!data) { 958 goto fail; 959 } 960 961 if (memcmp(key.dptr, data, key.dsize) == 0) { 962 /* a definite match - delete it */ 963 if (last_ptr == 0) { 964 offset = tdb_hash_top(tdb, hash); 965 if (ofs_write(tdb, offset, &rec.next) == -1) { 966 goto fail; 967 } 968 } else { 969 lastrec.next = rec.next; 970 if (rec_write(tdb, last_ptr, &lastrec) == -1) { 971 goto fail; 972 } 973 } 974 tdb_unlock(tdb, BUCKET(hash)); 975 tdb_lock(tdb, -1); 976 /* and recover the space */ 977 offset = FREELIST_TOP; 978 if (ofs_read(tdb, offset, &rec.next) == -1) { 979 goto fail2; 980 } 981 rec.magic = TDB_FREE_MAGIC; 982 if (rec_write(tdb, rec_ptr, &rec) == -1) { 983 goto fail2; 984 } 985 if (ofs_write(tdb, offset, &rec_ptr) == -1) { 986 goto fail2; 987 } 988 989 /* yipee - all done */ 990 free(data); 991 tdb_unlock(tdb, -1); 992 return 0; 993 } 994 995 /* a miss - drat */ 996 free(data); 997 data = NULL; 998 } 999 1000 /* move to the next record */ 1001 last_ptr = rec_ptr; 1002 lastrec = rec; 1003 rec_ptr = rec.next; 1004 } 1005 1006 fail: 1007 if (data) free(data); 1008 tdb_unlock(tdb, BUCKET(hash)); 1009 return -1; 1010 1011 fail2: 1012 if (data) free(data); 1013 tdb_unlock(tdb, -1); 1014 return -1; 1015} 1016 1017 1018/* store an element in the database, replacing any existing element 1019 with the same key 1020 1021 return 0 on success, -1 on failure 1022*/ 1023int tdb_store(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, int flag) 1024{ 1025 struct list_struct rec; 1026 unsigned hash; 1027 tdb_off rec_ptr, offset; 1028 char *p = NULL; 1029 1030 if (tdb == NULL) { 1031#ifdef TDB_DEBUG 1032 printf("tdb_store() called with null context\n"); 1033#endif 1034 return -1; 1035 } 1036 1037 /* find which hash bucket it is in */ 1038 hash = tdb_hash(&key); 1039 1040 /* check for it existing */ 1041 if (flag == TDB_INSERT && tdb_exists(tdb, key)) { 1042 tdb->ecode = TDB_ERR_EXISTS; 1043 return -1; 1044 } 1045 1046 /* first try in-place update */ 1047 if (flag != TDB_INSERT && tdb_update(tdb, key, dbuf) == 0) { 1048 return 0; 1049 } 1050 1051 rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize); 1052 if (rec_ptr == 0) { 1053 return -1; 1054 } 1055 1056 tdb_lock(tdb, BUCKET(hash)); 1057 1058 /* delete any existing record - if it doesn't exist we don't care */ 1059 if (flag != TDB_INSERT) { 1060 tdb_delete(tdb, key); 1061 } 1062 1063 /* read the newly created record */ 1064 if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) { 1065 goto fail; 1066 } 1067 1068 if (rec.magic != TDB_FREE_MAGIC) goto fail; 1069 1070 /* find the top of the hash chain */ 1071 offset = tdb_hash_top(tdb, hash); 1072 1073 /* read in the hash top diretcly into our next pointer */ 1074 if (ofs_read(tdb, offset, &rec.next) == -1) { 1075 goto fail; 1076 } 1077 1078 rec.key_len = key.dsize; 1079 rec.data_len = dbuf.dsize; 1080 rec.full_hash = hash; 1081 rec.magic = TDB_MAGIC; 1082 1083 p = (char *)malloc(sizeof(rec) + key.dsize + dbuf.dsize); 1084 if (!p) { 1085 tdb->ecode = TDB_ERR_OOM; 1086 goto fail; 1087 } 1088 1089 memcpy(p, &rec, sizeof(rec)); 1090 memcpy(p+sizeof(rec), key.dptr, key.dsize); 1091 memcpy(p+sizeof(rec)+key.dsize, dbuf.dptr, dbuf.dsize); 1092 1093 if (tdb_write(tdb, rec_ptr, p, sizeof(rec)+key.dsize+dbuf.dsize) == -1) 1094 goto fail; 1095 1096 free(p); 1097 p = NULL; 1098 1099 /* and point the top of the hash chain at it */ 1100 if (ofs_write(tdb, offset, &rec_ptr) == -1) goto fail; 1101 1102 tdb_unlock(tdb, BUCKET(hash)); 1103 return 0; 1104 1105 fail: 1106#if TDB_DEBUG 1107 printf("store failed for hash 0x%08x in bucket %u\n", hash, BUCKET(hash)); 1108#endif 1109 if (p) free(p); 1110 tdb_unlock(tdb, BUCKET(hash)); 1111 return -1; 1112} 1113 1114 1115/* open the database, creating it if necessary 1116 1117 The open_flags and mode are passed straight to the open call on the database 1118 file. A flags value of O_WRONLY is invalid 1119 1120 The hash size is advisory, use zero for a default value. 1121 1122 return is NULL on error 1123*/ 1124TDB_CONTEXT *tdb_open(char *name, int hash_size, int tdb_flags, 1125 int open_flags, mode_t mode) 1126{ 1127 TDB_CONTEXT tdb, *ret; 1128 struct stat st; 1129 1130 memset(&tdb, 0, sizeof(tdb)); 1131 1132 tdb.fd = -1; 1133 tdb.name = NULL; 1134 tdb.map_ptr = NULL; 1135 1136 if ((open_flags & O_ACCMODE) == O_WRONLY) { 1137 goto fail; 1138 } 1139 1140 if (hash_size == 0) hash_size = DEFAULT_HASH_SIZE; 1141 1142 tdb.read_only = ((open_flags & O_ACCMODE) == O_RDONLY); 1143 1144 if (name != NULL) { 1145 tdb.fd = open(name, open_flags, mode); 1146 if (tdb.fd == -1) { 1147 goto fail; 1148 } 1149 } 1150 1151 /* ensure there is only one process initialising at once */ 1152 tdb_brlock(&tdb, GLOBAL_LOCK, LOCK_SET, F_WRLCK, F_SETLKW); 1153 1154 if (tdb_flags & TDB_CLEAR_IF_FIRST) { 1155 /* we need to zero the database if we are the only 1156 one with it open */ 1157 if (tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_SET, F_WRLCK, F_SETLK) == 0) { 1158 ftruncate(tdb.fd, 0); 1159 tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_CLEAR, F_WRLCK, F_SETLK); 1160 } 1161 } 1162 1163 /* leave this lock in place */ 1164 tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_SET, F_RDLCK, F_SETLKW); 1165 1166 if (read(tdb.fd, &tdb.header, sizeof(tdb.header)) != sizeof(tdb.header) || 1167 strcmp(tdb.header.magic_food, TDB_MAGIC_FOOD) != 0 || 1168 tdb.header.version != TDB_VERSION) { 1169 /* its not a valid database - possibly initialise it */ 1170 if (!(open_flags & O_CREAT)) { 1171 goto fail; 1172 } 1173 if (tdb_new_database(&tdb, hash_size) == -1) goto fail; 1174 1175 lseek(tdb.fd, 0, SEEK_SET); 1176 if (tdb.fd != -1 && read(tdb.fd, &tdb.header, 1177 sizeof(tdb.header)) != 1178 sizeof(tdb.header)) 1179 goto fail; 1180 } 1181 1182 if (tdb.fd != -1) { 1183 fstat(tdb.fd, &st); 1184 1185 /* map the database and fill in the return structure */ 1186 tdb.name = name ? strdup(name) : NULL; 1187 tdb.map_size = st.st_size; 1188 } 1189 1190 tdb.locked = (int *)calloc(tdb.header.hash_size+1, 1191 sizeof(tdb.locked[0])); 1192 if (!tdb.locked) { 1193 goto fail; 1194 } 1195 1196#if HAVE_MMAP 1197 if (tdb.fd != -1) { 1198 tdb.map_ptr = (void *)mmap(NULL, st.st_size, 1199 tdb.read_only? PROT_READ : PROT_READ|PROT_WRITE, 1200 MAP_SHARED | MAP_FILE, tdb.fd, 0); 1201 if (tdb->map_ptr == MAP_FAILED) 1202 tdb->map_ptr = NULL; 1203 } 1204#endif 1205 1206 ret = (TDB_CONTEXT *)malloc(sizeof(tdb)); 1207 if (!ret) goto fail; 1208 1209 *ret = tdb; 1210 1211#if TDB_DEBUG 1212 printf("mapped database of hash_size %u map_size=%u\n", 1213 hash_size, tdb.map_size); 1214#endif 1215 1216 tdb_brlock(&tdb, GLOBAL_LOCK, LOCK_CLEAR, F_WRLCK, F_SETLKW); 1217 return ret; 1218 1219 fail: 1220 if (tdb.name) free(tdb.name); 1221 if (tdb.fd != -1) close(tdb.fd); 1222 if (tdb.map_ptr) munmap(tdb.map_ptr, tdb.map_size); 1223 1224 return NULL; 1225} 1226 1227/* close a database */ 1228int tdb_close(TDB_CONTEXT *tdb) 1229{ 1230 if (!tdb) return -1; 1231 1232 if (tdb->name) free(tdb->name); 1233 if (tdb->fd != -1) close(tdb->fd); 1234 if (tdb->locked) free(tdb->locked); 1235 1236 if (tdb->map_ptr) { 1237 if (tdb->fd != -1) { 1238 munmap(tdb->map_ptr, tdb->map_size); 1239 } else { 1240 free(tdb->map_ptr); 1241 } 1242 } 1243 1244 memset(tdb, 0, sizeof(*tdb)); 1245 free(tdb); 1246 1247 return 0; 1248} 1249 1250/* lock the database. If we already have it locked then don't do anything */ 1251int tdb_writelock(TDB_CONTEXT *tdb) 1252{ 1253 if (tdb == NULL) { 1254#ifdef TDB_DEBUG 1255 printf("tdb_writelock() called with null context\n"); 1256#endif 1257 return -1; 1258 } 1259 1260 return tdb_lock(tdb, -1); 1261} 1262 1263/* unlock the database. */ 1264int tdb_writeunlock(TDB_CONTEXT *tdb) 1265{ 1266 if (tdb == NULL) { 1267#ifdef TDB_DEBUG 1268 printf("tdb_writeunlock() called with null context\n"); 1269#endif 1270 return -1; 1271 } 1272 1273 return tdb_unlock(tdb, -1); 1274} 1275 1276int tdb_chainlock(TDB_CONTEXT *tdb, TDB_DATA key) 1277{ 1278 if (tdb == NULL) { 1279#ifdef TDB_DEBUG 1280 printf("tdb_lockchain() called with null context\n"); 1281#endif 1282 return -1; 1283 } 1284 1285 return tdb_lock(tdb, BUCKET(tdb_hash(&key))); 1286} 1287 1288 1289int tdb_chainunlock(TDB_CONTEXT *tdb, TDB_DATA key) 1290{ 1291 if (tdb == NULL) { 1292#ifdef TDB_DEBUG 1293 printf("tdb_unlockchain() called with null context\n"); 1294#endif 1295 return -1; 1296 } 1297 1298 return tdb_unlock(tdb, BUCKET(tdb_hash(&key))); 1299} 1300