1/* 2 * Database functions 3 * Copyright (C) Andrew Tridgell 1999 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms AND provided that this software or 8 * any derived work is only used as part of the PPP daemon (pppd) 9 * and related utilities. 10 * The name of the author may not be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 13 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 14 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 15 * 16 * Note: this software is also available under the Gnu Public License 17 * version 2 or later. 18 */ 19 20#include <stdlib.h> 21#include <stdio.h> 22#include <fcntl.h> 23#include <unistd.h> 24#include <string.h> 25#include <fcntl.h> 26#include <errno.h> 27#include <sys/mman.h> 28#include <sys/stat.h> 29#include "tdb.h" 30 31#define TDB_VERSION (0x26011967 + 1) 32#define TDB_MAGIC (0x26011999U) 33#define TDB_FREE_MAGIC (~TDB_MAGIC) 34#define TDB_ALIGN 4 35#define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGN) 36#define DEFAULT_HASH_SIZE 128 37#define TDB_PAGE_SIZE 0x2000 38#define TDB_LEN_MULTIPLIER 10 39#define FREELIST_TOP (sizeof(struct tdb_header)) 40 41#define LOCK_SET 1 42#define LOCK_CLEAR 0 43 44/* lock offsets */ 45#define GLOBAL_LOCK 0 46#define ACTIVE_LOCK 4 47#define LIST_LOCK_BASE 1024 48 49#define BUCKET(hash) ((hash) % tdb->header.hash_size) 50 51#ifndef MAP_FILE 52#define MAP_FILE 0 53#endif 54 55/* the body of the database is made of one list_struct for the free space 56 plus a separate data list for each hash value */ 57struct list_struct { 58 tdb_len rec_len; /* total byte length of record */ 59 tdb_off next; /* offset of the next record in the list */ 60 tdb_len key_len; /* byte length of key */ 61 tdb_len data_len; /* byte length of data */ 62 unsigned full_hash; /* the full 32 bit hash of the key */ 63 unsigned magic; /* try to catch errors */ 64 /* 65 the following union is implied 66 union { 67 char record[rec_len]; 68 struct { 69 char key[key_len]; 70 char data[data_len]; 71 } 72 } 73 */ 74}; 75 76/* a null data record - useful for error returns */ 77static TDB_DATA null_data; 78 79/* a byte range locking function - return 0 on success 80 this functions locks/unlocks 1 byte at the specified offset */ 81static int tdb_brlock(TDB_CONTEXT *tdb, tdb_off offset, 82 int set, int rw_type, int lck_type) 83{ 84#if NOLOCK 85 return 0; 86#else 87 struct flock fl; 88 89 if (tdb->fd == -1) return 0; /* for in memory tdb */ 90 91 if (tdb->read_only) return -1; 92 93 fl.l_type = set==LOCK_SET?rw_type:F_UNLCK; 94 fl.l_whence = SEEK_SET; 95 fl.l_start = offset; 96 fl.l_len = 1; 97 fl.l_pid = 0; 98 99 if (fcntl(tdb->fd, lck_type, &fl) != 0) { 100#if TDB_DEBUG 101 if (lck_type == F_SETLKW) { 102 printf("lock %d failed at %d (%s)\n", 103 set, offset, strerror(errno)); 104 } 105#endif 106 tdb->ecode = TDB_ERR_LOCK; 107 return -1; 108 } 109 return 0; 110#endif 111} 112 113/* lock a list in the database. list -1 is the alloc list */ 114static int tdb_lock(TDB_CONTEXT *tdb, int list) 115{ 116 if (list < -1 || list >= (int)tdb->header.hash_size) { 117#if TDB_DEBUG 118 printf("bad list %d\n", list); 119#endif 120 return -1; 121 } 122 if (tdb->locked[list+1] == 0) { 123 if (tdb_brlock(tdb, LIST_LOCK_BASE + 4*list, LOCK_SET, 124 F_WRLCK, F_SETLKW) != 0) { 125 return -1; 126 } 127 } 128 tdb->locked[list+1]++; 129 return 0; 130} 131 132/* unlock the database. */ 133static int tdb_unlock(TDB_CONTEXT *tdb, int list) 134{ 135 if (list < -1 || list >= (int)tdb->header.hash_size) { 136#if TDB_DEBUG 137 printf("bad unlock list %d\n", list); 138#endif 139 return -1; 140 } 141 142 if (tdb->locked[list+1] == 0) { 143#if TDB_DEBUG 144 printf("not locked %d\n", list); 145#endif 146 tdb->ecode = TDB_ERR_LOCK; 147 return -1; 148 } 149 if (tdb->locked[list+1] == 1) { 150 if (tdb_brlock(tdb, LIST_LOCK_BASE + 4*list, LOCK_CLEAR, 151 F_WRLCK, F_SETLKW) != 0) { 152 return -1; 153 } 154 } 155 tdb->locked[list+1]--; 156 return 0; 157} 158 159/* the hash algorithm - turn a key into an integer 160 This is based on the hash agorithm from gdbm */ 161static unsigned tdb_hash(TDB_DATA *key) 162{ 163 unsigned value; /* Used to compute the hash value. */ 164 unsigned i; /* Used to cycle through random values. */ 165 166 /* Set the initial value from the key size. */ 167 value = 0x238F13AF * key->dsize; 168 for (i=0; i < key->dsize; i++) { 169 value = (value + (key->dptr[i] << (i*5 % 24))); 170 } 171 172 value = (1103515243 * value + 12345); 173 174 return value; 175} 176 177/* find the top of the hash chain for an open database */ 178static tdb_off tdb_hash_top(TDB_CONTEXT *tdb, unsigned hash) 179{ 180 tdb_off ret; 181 hash = BUCKET(hash); 182 ret = FREELIST_TOP + (hash+1)*sizeof(tdb_off); 183 return ret; 184} 185 186 187/* check for an out of bounds access - if it is out of bounds then 188 see if the database has been expanded by someone else and expand 189 if necessary */ 190static int tdb_oob(TDB_CONTEXT *tdb, tdb_off offset) 191{ 192 struct stat st; 193 if ((offset <= tdb->map_size) || (tdb->fd == -1)) return 0; 194 195 fstat(tdb->fd, &st); 196 if (st.st_size <= (ssize_t)offset) { 197 tdb->ecode = TDB_ERR_IO; 198 return -1; 199 } 200 201#if HAVE_MMAP 202 if (tdb->map_ptr) { 203 munmap(tdb->map_ptr, tdb->map_size); 204 tdb->map_ptr = NULL; 205 } 206#endif 207 208 tdb->map_size = st.st_size; 209#if HAVE_MMAP 210 tdb->map_ptr = (void *)mmap(NULL, tdb->map_size, 211 tdb->read_only?PROT_READ:PROT_READ|PROT_WRITE, 212 MAP_SHARED | MAP_FILE, tdb->fd, 0); 213#endif 214 return 0; 215} 216 217 218/* write a lump of data at a specified offset */ 219static int tdb_write(TDB_CONTEXT *tdb, tdb_off offset, const char *buf, tdb_len len) 220{ 221 if (tdb_oob(tdb, offset + len) != 0) { 222 /* oops - trying to write beyond the end of the database! */ 223 return -1; 224 } 225 226 if (tdb->map_ptr) { 227 memcpy(offset + (char *)tdb->map_ptr, buf, len); 228 } else { 229 if (lseek(tdb->fd, offset, SEEK_SET) != offset || 230 write(tdb->fd, buf, len) != (ssize_t)len) { 231 tdb->ecode = TDB_ERR_IO; 232 return -1; 233 } 234 } 235 return 0; 236} 237 238/* read a lump of data at a specified offset */ 239static int tdb_read(TDB_CONTEXT *tdb, tdb_off offset, char *buf, tdb_len len) 240{ 241 if (tdb_oob(tdb, offset + len) != 0) { 242 /* oops - trying to read beyond the end of the database! */ 243 return -1; 244 } 245 246 if (tdb->map_ptr) { 247 memcpy(buf, offset + (char *)tdb->map_ptr, len); 248 } else { 249 if (lseek(tdb->fd, offset, SEEK_SET) != offset || 250 read(tdb->fd, buf, len) != (ssize_t)len) { 251 tdb->ecode = TDB_ERR_IO; 252 return -1; 253 } 254 } 255 return 0; 256} 257 258 259/* read a lump of data, allocating the space for it */ 260static char *tdb_alloc_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_len len) 261{ 262 char *buf; 263 264 buf = (char *)malloc(len); 265 266 if (!buf) { 267 tdb->ecode = TDB_ERR_OOM; 268 return NULL; 269 } 270 271 if (tdb_read(tdb, offset, buf, len) == -1) { 272 free(buf); 273 return NULL; 274 } 275 276 return buf; 277} 278 279/* convenience routine for writing a record */ 280static int rec_write(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec) 281{ 282 return tdb_write(tdb, offset, (char *)rec, sizeof(*rec)); 283} 284 285/* convenience routine for writing a tdb_off */ 286static int ofs_write(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d) 287{ 288 return tdb_write(tdb, offset, (char *)d, sizeof(*d)); 289} 290 291/* read a tdb_off from the store */ 292static int ofs_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d) 293{ 294 return tdb_read(tdb, offset, (char *)d, sizeof(*d)); 295} 296 297/* read a record and check for simple errors */ 298static int rec_read(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec) 299{ 300 if (tdb_read(tdb, offset, (char *)rec, sizeof(*rec)) == -1) return -1; 301 if (rec->magic != TDB_MAGIC) { 302#if TDB_DEBUG 303 printf("bad magic 0x%08x at offset %d\n", 304 rec->magic, offset); 305#endif 306 tdb->ecode = TDB_ERR_CORRUPT; 307 return -1; 308 } 309 if (tdb_oob(tdb, rec->next) != 0) { 310 return -1; 311 } 312 return 0; 313} 314 315/* expand the database at least length bytes by expanding the 316 underlying file and doing the mmap again if necessary */ 317static int tdb_expand(TDB_CONTEXT *tdb, tdb_off length) 318{ 319 struct list_struct rec; 320 tdb_off offset, ptr; 321 char b = 0; 322 323 tdb_lock(tdb,-1); 324 325 /* make sure we know about any previous expansions by another 326 process */ 327 tdb_oob(tdb,tdb->map_size + 1); 328 329 /* always make room for at least 10 more records */ 330 length *= TDB_LEN_MULTIPLIER; 331 332 /* and round the database up to a multiple of TDB_PAGE_SIZE */ 333 length = ((tdb->map_size + length + TDB_PAGE_SIZE) & ~(TDB_PAGE_SIZE - 1)) - tdb->map_size; 334 335 /* expand the file itself */ 336 if (tdb->fd != -1) { 337 lseek(tdb->fd, tdb->map_size + length - 1, SEEK_SET); 338 if (write(tdb->fd, &b, 1) != 1) goto fail; 339 } 340 341 /* form a new freelist record */ 342 offset = FREELIST_TOP; 343 rec.rec_len = length - sizeof(rec); 344 rec.magic = TDB_FREE_MAGIC; 345 if (ofs_read(tdb, offset, &rec.next) == -1) { 346 goto fail; 347 } 348 349#if HAVE_MMAP 350 if (tdb->fd != -1 && tdb->map_ptr) { 351 munmap(tdb->map_ptr, tdb->map_size); 352 tdb->map_ptr = NULL; 353 } 354#endif 355 356 tdb->map_size += length; 357 358 if (tdb->fd == -1) { 359 tdb->map_ptr = realloc(tdb->map_ptr, tdb->map_size); 360 } 361 362 /* write it out */ 363 if (rec_write(tdb, tdb->map_size - length, &rec) == -1) { 364 goto fail; 365 } 366 367 /* link it into the free list */ 368 ptr = tdb->map_size - length; 369 if (ofs_write(tdb, offset, &ptr) == -1) goto fail; 370 371#if HAVE_MMAP 372 if (tdb->fd != -1) { 373 tdb->map_ptr = (void *)mmap(NULL, tdb->map_size, 374 PROT_READ|PROT_WRITE, 375 MAP_SHARED | MAP_FILE, tdb->fd, 0); 376 } 377#endif 378 379 tdb_unlock(tdb, -1); 380 return 0; 381 382 fail: 383 tdb_unlock(tdb,-1); 384 return -1; 385} 386 387/* allocate some space from the free list. The offset returned points 388 to a unconnected list_struct within the database with room for at 389 least length bytes of total data 390 391 0 is returned if the space could not be allocated 392 */ 393static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length) 394{ 395 tdb_off offset, rec_ptr, last_ptr; 396 struct list_struct rec, lastrec, newrec; 397 398 tdb_lock(tdb, -1); 399 400 again: 401 last_ptr = 0; 402 offset = FREELIST_TOP; 403 404 /* read in the freelist top */ 405 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 406 goto fail; 407 } 408 409 /* keep looking until we find a freelist record that is big 410 enough */ 411 while (rec_ptr) { 412 if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) { 413 goto fail; 414 } 415 416 if (rec.magic != TDB_FREE_MAGIC) { 417#if TDB_DEBUG 418 printf("bad magic 0x%08x in free list\n", rec.magic); 419#endif 420 goto fail; 421 } 422 423 if (rec.rec_len >= length) { 424 /* found it - now possibly split it up */ 425 if (rec.rec_len > length + MIN_REC_SIZE) { 426 length = (length + TDB_ALIGN) & ~(TDB_ALIGN-1); 427 428 newrec.rec_len = rec.rec_len - (sizeof(rec) + length); 429 newrec.next = rec.next; 430 newrec.magic = TDB_FREE_MAGIC; 431 432 rec.rec_len = length; 433 rec.next = rec_ptr + sizeof(rec) + rec.rec_len; 434 435 if (rec_write(tdb, rec.next, &newrec) == -1) { 436 goto fail; 437 } 438 439 if (rec_write(tdb, rec_ptr, &rec) == -1) { 440 goto fail; 441 } 442 } 443 444 /* remove it from the list */ 445 if (last_ptr == 0) { 446 offset = FREELIST_TOP; 447 448 if (ofs_write(tdb, offset, &rec.next) == -1) { 449 goto fail; 450 } 451 } else { 452 lastrec.next = rec.next; 453 if (rec_write(tdb, last_ptr, &lastrec) == -1) { 454 goto fail; 455 } 456 } 457 458 /* all done - return the new record offset */ 459 tdb_unlock(tdb, -1); 460 return rec_ptr; 461 } 462 463 /* move to the next record */ 464 lastrec = rec; 465 last_ptr = rec_ptr; 466 rec_ptr = rec.next; 467 } 468 469 /* we didn't find enough space. See if we can expand the 470 database and if we can then try again */ 471 if (tdb_expand(tdb, length + sizeof(rec)) == 0) goto again; 472 473 fail: 474#if TDB_DEBUG 475 printf("tdb_allocate failed for size %u\n", length); 476#endif 477 tdb_unlock(tdb, -1); 478 return 0; 479} 480 481/* initialise a new database with a specified hash size */ 482static int tdb_new_database(TDB_CONTEXT *tdb, int hash_size) 483{ 484 struct tdb_header header; 485 tdb_off offset; 486 int i, size = 0; 487 tdb_off buf[16]; 488 489 /* create the header */ 490 memset(&header, 0, sizeof(header)); 491 memcpy(header.magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1); 492 header.version = TDB_VERSION; 493 header.hash_size = hash_size; 494 lseek(tdb->fd, 0, SEEK_SET); 495 ftruncate(tdb->fd, 0); 496 497 if (tdb->fd != -1 && write(tdb->fd, &header, sizeof(header)) != 498 sizeof(header)) { 499 tdb->ecode = TDB_ERR_IO; 500 return -1; 501 } else size += sizeof(header); 502 503 /* the freelist and hash pointers */ 504 offset = 0; 505 memset(buf, 0, sizeof(buf)); 506 507 for (i=0;(hash_size+1)-i >= 16; i += 16) { 508 if (tdb->fd != -1 && write(tdb->fd, buf, sizeof(buf)) != 509 sizeof(buf)) { 510 tdb->ecode = TDB_ERR_IO; 511 return -1; 512 } else size += sizeof(buf); 513 } 514 515 for (;i<hash_size+1; i++) { 516 if (tdb->fd != -1 && write(tdb->fd, buf, sizeof(tdb_off)) != 517 sizeof(tdb_off)) { 518 tdb->ecode = TDB_ERR_IO; 519 return -1; 520 } else size += sizeof(tdb_off); 521 } 522 523 if (tdb->fd == -1) { 524 tdb->map_ptr = calloc(size, 1); 525 tdb->map_size = size; 526 if (tdb->map_ptr == NULL) { 527 tdb->ecode = TDB_ERR_IO; 528 return -1; 529 } 530 memcpy(&tdb->header, &header, sizeof(header)); 531 } 532 533#if TDB_DEBUG 534 printf("initialised database of hash_size %u\n", 535 hash_size); 536#endif 537 return 0; 538} 539 540/* Returns 0 on fail. On success, return offset of record, and fills 541 in rec */ 542static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, unsigned int hash, 543 struct list_struct *rec) 544{ 545 tdb_off offset, rec_ptr; 546 547 /* find the top of the hash chain */ 548 offset = tdb_hash_top(tdb, hash); 549 550 /* read in the hash top */ 551 if (ofs_read(tdb, offset, &rec_ptr) == -1) 552 return 0; 553 554 /* keep looking until we find the right record */ 555 while (rec_ptr) { 556 if (rec_read(tdb, rec_ptr, rec) == -1) 557 return 0; 558 559 if (hash == rec->full_hash && key.dsize == rec->key_len) { 560 char *k; 561 /* a very likely hit - read the key */ 562 k = tdb_alloc_read(tdb, rec_ptr + sizeof(*rec), 563 rec->key_len); 564 565 if (!k) 566 return 0; 567 568 if (memcmp(key.dptr, k, key.dsize) == 0) { 569 free(k); 570 return rec_ptr; 571 } 572 free(k); 573 } 574 575 /* move to the next record */ 576 rec_ptr = rec->next; 577 } 578 return 0; 579} 580 581/* 582 return an error string for the last tdb error 583*/ 584char *tdb_error(TDB_CONTEXT *tdb) 585{ 586 int i; 587 static struct { 588 enum TDB_ERROR ecode; 589 char *estring; 590 } emap[] = { 591 {TDB_SUCCESS, "Success"}, 592 {TDB_ERR_CORRUPT, "Corrupt database"}, 593 {TDB_ERR_IO, "IO Error"}, 594 {TDB_ERR_LOCK, "Locking error"}, 595 {TDB_ERR_OOM, "Out of memory"}, 596 {TDB_ERR_EXISTS, "Record exists"}, 597 {-1, NULL}}; 598 if (tdb != NULL) { 599 for (i=0;emap[i].estring;i++) { 600 if (tdb->ecode == emap[i].ecode) return emap[i].estring; 601 } 602 } else { 603 return "Invalid tdb context"; 604 } 605 return "Invalid error code"; 606} 607 608 609/* update an entry in place - this only works if the new data size 610 is <= the old data size and the key exists. 611 on failure return -1 612*/ 613int tdb_update(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf) 614{ 615 unsigned hash; 616 struct list_struct rec; 617 tdb_off rec_ptr; 618 int ret = -1; 619 620 if (tdb == NULL) { 621#ifdef TDB_DEBUG 622 printf("tdb_update() called with null context\n"); 623#endif 624 return -1; 625 } 626 627 /* find which hash bucket it is in */ 628 hash = tdb_hash(&key); 629 630 tdb_lock(tdb, BUCKET(hash)); 631 rec_ptr = tdb_find(tdb, key, hash, &rec); 632 633 if (!rec_ptr) 634 goto out; 635 636 /* must be long enough */ 637 if (rec.rec_len < key.dsize + dbuf.dsize) 638 goto out; 639 640 if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len, 641 dbuf.dptr, dbuf.dsize) == -1) 642 goto out; 643 644 if (dbuf.dsize != rec.data_len) { 645 /* update size */ 646 rec.data_len = dbuf.dsize; 647 ret = rec_write(tdb, rec_ptr, &rec); 648 } else 649 ret = 0; 650 651 out: 652 tdb_unlock(tdb, BUCKET(hash)); 653 return ret; 654} 655 656/* find an entry in the database given a key */ 657TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key) 658{ 659 unsigned hash; 660 tdb_off rec_ptr; 661 struct list_struct rec; 662 TDB_DATA ret = null_data; 663 664 if (tdb == NULL) { 665#ifdef TDB_DEBUG 666 printf("tdb_fetch() called with null context\n"); 667#endif 668 return null_data; 669 } 670 671 /* find which hash bucket it is in */ 672 hash = tdb_hash(&key); 673 674 tdb_lock(tdb, BUCKET(hash)); 675 rec_ptr = tdb_find(tdb, key, hash, &rec); 676 677 if (rec_ptr) { 678 ret.dptr = tdb_alloc_read(tdb, 679 rec_ptr + sizeof(rec) + rec.key_len, 680 rec.data_len); 681 ret.dsize = rec.data_len; 682 } 683 684 tdb_unlock(tdb, BUCKET(hash)); 685 return ret; 686} 687 688/* check if an entry in the database exists 689 690 note that 1 is returned if the key is found and 0 is returned if not found 691 this doesn't match the conventions in the rest of this module, but is 692 compatible with gdbm 693*/ 694int tdb_exists(TDB_CONTEXT *tdb, TDB_DATA key) 695{ 696 unsigned hash; 697 tdb_off rec_ptr; 698 struct list_struct rec; 699 700 if (tdb == NULL) { 701#ifdef TDB_DEBUG 702 printf("tdb_exists() called with null context\n"); 703#endif 704 return 0; 705 } 706 707 /* find which hash bucket it is in */ 708 hash = tdb_hash(&key); 709 710 tdb_lock(tdb, BUCKET(hash)); 711 rec_ptr = tdb_find(tdb, key, hash, &rec); 712 tdb_unlock(tdb, BUCKET(hash)); 713 714 return rec_ptr != 0; 715} 716 717/* traverse the entire database - calling fn(tdb, key, data) on each element. 718 return -1 on error or the record count traversed 719 if fn is NULL then it is not called 720 a non-zero return value from fn() indicates that the traversal should stop 721 */ 722int tdb_traverse(TDB_CONTEXT *tdb, int (*fn)(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, void* state), void* state) 723{ 724 int count = 0; 725 unsigned h; 726 tdb_off offset, rec_ptr; 727 struct list_struct rec; 728 char *data; 729 TDB_DATA key, dbuf; 730 731 if (tdb == NULL) { 732#ifdef TDB_DEBUG 733 printf("tdb_traverse() called with null context\n"); 734#endif 735 return -1; 736 } 737 738 /* loop over all hash chains */ 739 for (h = 0; h < tdb->header.hash_size; h++) { 740 tdb_lock(tdb, BUCKET(h)); 741 742 /* read in the hash top */ 743 offset = tdb_hash_top(tdb, h); 744 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 745 goto fail; 746 } 747 748 /* traverse all records for this hash */ 749 while (rec_ptr) { 750 if (rec_read(tdb, rec_ptr, &rec) == -1) { 751 goto fail; 752 } 753 754 /* now read the full record */ 755 data = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), 756 rec.key_len + rec.data_len); 757 if (!data) { 758 goto fail; 759 } 760 761 key.dptr = data; 762 key.dsize = rec.key_len; 763 dbuf.dptr = data + rec.key_len; 764 dbuf.dsize = rec.data_len; 765 count++; 766 767 if (fn && fn(tdb, key, dbuf, state) != 0) { 768 /* they want us to stop traversing */ 769 free(data); 770 tdb_unlock(tdb, BUCKET(h)); 771 return count; 772 } 773 774 /* a miss - drat */ 775 free(data); 776 777 /* move to the next record */ 778 rec_ptr = rec.next; 779 } 780 tdb_unlock(tdb, BUCKET(h)); 781 } 782 783 /* return the number traversed */ 784 return count; 785 786 fail: 787 tdb_unlock(tdb, BUCKET(h)); 788 return -1; 789} 790 791 792/* find the first entry in the database and return its key */ 793TDB_DATA tdb_firstkey(TDB_CONTEXT *tdb) 794{ 795 tdb_off offset, rec_ptr; 796 struct list_struct rec; 797 unsigned hash; 798 TDB_DATA ret; 799 800 if (tdb == NULL) { 801#ifdef TDB_DEBUG 802 printf("tdb_firstkey() called with null context\n"); 803#endif 804 return null_data; 805 } 806 807 /* look for a non-empty hash chain */ 808 for (hash = 0, rec_ptr = 0; 809 hash < tdb->header.hash_size; 810 hash++) { 811 /* find the top of the hash chain */ 812 offset = tdb_hash_top(tdb, hash); 813 814 tdb_lock(tdb, BUCKET(hash)); 815 816 /* read in the hash top */ 817 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 818 goto fail; 819 } 820 821 if (rec_ptr) break; 822 823 tdb_unlock(tdb, BUCKET(hash)); 824 } 825 826 if (rec_ptr == 0) return null_data; 827 828 /* we've found a non-empty chain, now read the record */ 829 if (rec_read(tdb, rec_ptr, &rec) == -1) { 830 goto fail; 831 } 832 833 /* allocate and read the key space */ 834 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len); 835 ret.dsize = rec.key_len; 836 tdb_unlock(tdb, BUCKET(hash)); 837 return ret; 838 839 fail: 840 tdb_unlock(tdb, BUCKET(hash)); 841 return null_data; 842} 843 844/* find the next entry in the database, returning its key */ 845TDB_DATA tdb_nextkey(TDB_CONTEXT *tdb, TDB_DATA key) 846{ 847 unsigned hash, hbucket; 848 tdb_off rec_ptr, offset; 849 struct list_struct rec; 850 TDB_DATA ret; 851 852 if (tdb == NULL) { 853#ifdef TDB_DEBUG 854 printf("tdb_nextkey() called with null context\n"); 855#endif 856 return null_data; 857 } 858 859 /* find which hash bucket it is in */ 860 hash = tdb_hash(&key); 861 hbucket = BUCKET(hash); 862 863 tdb_lock(tdb, hbucket); 864 rec_ptr = tdb_find(tdb, key, hash, &rec); 865 if (rec_ptr) { 866 /* we want the next record after this one */ 867 rec_ptr = rec.next; 868 } 869 870 /* not found or last in hash: look for next non-empty hash chain */ 871 while (rec_ptr == 0) { 872 tdb_unlock(tdb, hbucket); 873 874 if (++hbucket >= tdb->header.hash_size - 1) 875 return null_data; 876 877 offset = tdb_hash_top(tdb, hbucket); 878 tdb_lock(tdb, hbucket); 879 /* read in the hash top */ 880 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 881 tdb_unlock(tdb, hbucket); 882 return null_data; 883 } 884 } 885 886 /* Read the record. */ 887 if (rec_read(tdb, rec_ptr, &rec) == -1) { 888 tdb_unlock(tdb, hbucket); 889 return null_data; 890 } 891 /* allocate and read the key */ 892 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len); 893 ret.dsize = rec.key_len; 894 tdb_unlock(tdb, hbucket); 895 896 return ret; 897} 898 899/* delete an entry in the database given a key */ 900int tdb_delete(TDB_CONTEXT *tdb, TDB_DATA key) 901{ 902 unsigned hash; 903 tdb_off offset, rec_ptr, last_ptr; 904 struct list_struct rec, lastrec; 905 char *data = NULL; 906 907 if (tdb == NULL) { 908#ifdef TDB_DEBUG 909 printf("tdb_delete() called with null context\n"); 910#endif 911 return -1; 912 } 913 914 /* find which hash bucket it is in */ 915 hash = tdb_hash(&key); 916 917 tdb_lock(tdb, BUCKET(hash)); 918 919 /* find the top of the hash chain */ 920 offset = tdb_hash_top(tdb, hash); 921 922 /* read in the hash top */ 923 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 924 goto fail; 925 } 926 927 last_ptr = 0; 928 929 /* keep looking until we find the right record */ 930 while (rec_ptr) { 931 if (rec_read(tdb, rec_ptr, &rec) == -1) { 932 goto fail; 933 } 934 935 if (hash == rec.full_hash && key.dsize == rec.key_len) { 936 /* a very likely hit - read the record and full key */ 937 data = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), 938 rec.key_len); 939 if (!data) { 940 goto fail; 941 } 942 943 if (memcmp(key.dptr, data, key.dsize) == 0) { 944 /* a definite match - delete it */ 945 if (last_ptr == 0) { 946 offset = tdb_hash_top(tdb, hash); 947 if (ofs_write(tdb, offset, &rec.next) == -1) { 948 goto fail; 949 } 950 } else { 951 lastrec.next = rec.next; 952 if (rec_write(tdb, last_ptr, &lastrec) == -1) { 953 goto fail; 954 } 955 } 956 tdb_unlock(tdb, BUCKET(hash)); 957 tdb_lock(tdb, -1); 958 /* and recover the space */ 959 offset = FREELIST_TOP; 960 if (ofs_read(tdb, offset, &rec.next) == -1) { 961 goto fail2; 962 } 963 rec.magic = TDB_FREE_MAGIC; 964 if (rec_write(tdb, rec_ptr, &rec) == -1) { 965 goto fail2; 966 } 967 if (ofs_write(tdb, offset, &rec_ptr) == -1) { 968 goto fail2; 969 } 970 971 /* yipee - all done */ 972 free(data); 973 tdb_unlock(tdb, -1); 974 return 0; 975 } 976 977 /* a miss - drat */ 978 free(data); 979 data = NULL; 980 } 981 982 /* move to the next record */ 983 last_ptr = rec_ptr; 984 lastrec = rec; 985 rec_ptr = rec.next; 986 } 987 988 fail: 989 if (data) free(data); 990 tdb_unlock(tdb, BUCKET(hash)); 991 return -1; 992 993 fail2: 994 if (data) free(data); 995 tdb_unlock(tdb, -1); 996 return -1; 997} 998 999 1000/* store an element in the database, replacing any existing element 1001 with the same key 1002 1003 return 0 on success, -1 on failure 1004*/ 1005int tdb_store(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, int flag) 1006{ 1007 struct list_struct rec; 1008 unsigned hash; 1009 tdb_off rec_ptr, offset; 1010 char *p = NULL; 1011 1012 if (tdb == NULL) { 1013#ifdef TDB_DEBUG 1014 printf("tdb_store() called with null context\n"); 1015#endif 1016 return -1; 1017 } 1018 1019 /* find which hash bucket it is in */ 1020 hash = tdb_hash(&key); 1021 1022 /* check for it existing */ 1023 if (flag == TDB_INSERT && tdb_exists(tdb, key)) { 1024 tdb->ecode = TDB_ERR_EXISTS; 1025 return -1; 1026 } 1027 1028 /* first try in-place update */ 1029 if (flag != TDB_INSERT && tdb_update(tdb, key, dbuf) == 0) { 1030 return 0; 1031 } 1032 1033 rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize); 1034 if (rec_ptr == 0) { 1035 return -1; 1036 } 1037 1038 tdb_lock(tdb, BUCKET(hash)); 1039 1040 /* delete any existing record - if it doesn't exist we don't care */ 1041 if (flag != TDB_INSERT) { 1042 tdb_delete(tdb, key); 1043 } 1044 1045 /* read the newly created record */ 1046 if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) { 1047 goto fail; 1048 } 1049 1050 if (rec.magic != TDB_FREE_MAGIC) goto fail; 1051 1052 /* find the top of the hash chain */ 1053 offset = tdb_hash_top(tdb, hash); 1054 1055 /* read in the hash top diretcly into our next pointer */ 1056 if (ofs_read(tdb, offset, &rec.next) == -1) { 1057 goto fail; 1058 } 1059 1060 rec.key_len = key.dsize; 1061 rec.data_len = dbuf.dsize; 1062 rec.full_hash = hash; 1063 rec.magic = TDB_MAGIC; 1064 1065 p = (char *)malloc(sizeof(rec) + key.dsize + dbuf.dsize); 1066 if (!p) { 1067 tdb->ecode = TDB_ERR_OOM; 1068 goto fail; 1069 } 1070 1071 memcpy(p, &rec, sizeof(rec)); 1072 memcpy(p+sizeof(rec), key.dptr, key.dsize); 1073 memcpy(p+sizeof(rec)+key.dsize, dbuf.dptr, dbuf.dsize); 1074 1075 if (tdb_write(tdb, rec_ptr, p, sizeof(rec)+key.dsize+dbuf.dsize) == -1) 1076 goto fail; 1077 1078 free(p); 1079 p = NULL; 1080 1081 /* and point the top of the hash chain at it */ 1082 if (ofs_write(tdb, offset, &rec_ptr) == -1) goto fail; 1083 1084 tdb_unlock(tdb, BUCKET(hash)); 1085 return 0; 1086 1087 fail: 1088#if TDB_DEBUG 1089 printf("store failed for hash 0x%08x in bucket %u\n", hash, BUCKET(hash)); 1090#endif 1091 if (p) free(p); 1092 tdb_unlock(tdb, BUCKET(hash)); 1093 return -1; 1094} 1095 1096 1097/* open the database, creating it if necessary 1098 1099 The open_flags and mode are passed straight to the open call on the database 1100 file. A flags value of O_WRONLY is invalid 1101 1102 The hash size is advisory, use zero for a default value. 1103 1104 return is NULL on error 1105*/ 1106TDB_CONTEXT *tdb_open(char *name, int hash_size, int tdb_flags, 1107 int open_flags, mode_t mode) 1108{ 1109 TDB_CONTEXT tdb, *ret; 1110 struct stat st; 1111 1112 memset(&tdb, 0, sizeof(tdb)); 1113 1114 tdb.fd = -1; 1115 tdb.name = NULL; 1116 tdb.map_ptr = NULL; 1117 1118 if ((open_flags & O_ACCMODE) == O_WRONLY) { 1119 goto fail; 1120 } 1121 1122 if (hash_size == 0) hash_size = DEFAULT_HASH_SIZE; 1123 1124 tdb.read_only = ((open_flags & O_ACCMODE) == O_RDONLY); 1125 1126 if (name != NULL) { 1127 tdb.fd = open(name, open_flags, mode); 1128 if (tdb.fd == -1) { 1129 goto fail; 1130 } 1131 } 1132 1133 /* ensure there is only one process initialising at once */ 1134 tdb_brlock(&tdb, GLOBAL_LOCK, LOCK_SET, F_WRLCK, F_SETLKW); 1135 1136 if (tdb_flags & TDB_CLEAR_IF_FIRST) { 1137 /* we need to zero the database if we are the only 1138 one with it open */ 1139 if (tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_SET, F_WRLCK, F_SETLK) == 0) { 1140 ftruncate(tdb.fd, 0); 1141 tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_CLEAR, F_WRLCK, F_SETLK); 1142 } 1143 } 1144 1145 /* leave this lock in place */ 1146 tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_SET, F_RDLCK, F_SETLKW); 1147 1148 if (read(tdb.fd, &tdb.header, sizeof(tdb.header)) != sizeof(tdb.header) || 1149 strcmp(tdb.header.magic_food, TDB_MAGIC_FOOD) != 0 || 1150 tdb.header.version != TDB_VERSION) { 1151 /* its not a valid database - possibly initialise it */ 1152 if (!(open_flags & O_CREAT)) { 1153 goto fail; 1154 } 1155 if (tdb_new_database(&tdb, hash_size) == -1) goto fail; 1156 1157 lseek(tdb.fd, 0, SEEK_SET); 1158 if (tdb.fd != -1 && read(tdb.fd, &tdb.header, 1159 sizeof(tdb.header)) != 1160 sizeof(tdb.header)) 1161 goto fail; 1162 } 1163 1164 if (tdb.fd != -1) { 1165 fstat(tdb.fd, &st); 1166 1167 /* map the database and fill in the return structure */ 1168 tdb.name = (char *)strdup(name); 1169 tdb.map_size = st.st_size; 1170 } 1171 1172 tdb.locked = (int *)calloc(tdb.header.hash_size+1, 1173 sizeof(tdb.locked[0])); 1174 if (!tdb.locked) { 1175 goto fail; 1176 } 1177 1178#if HAVE_MMAP 1179 if (tdb.fd != -1) { 1180 tdb.map_ptr = (void *)mmap(NULL, st.st_size, 1181 tdb.read_only? PROT_READ : PROT_READ|PROT_WRITE, 1182 MAP_SHARED | MAP_FILE, tdb.fd, 0); 1183 } 1184#endif 1185 1186 ret = (TDB_CONTEXT *)malloc(sizeof(tdb)); 1187 if (!ret) goto fail; 1188 1189 *ret = tdb; 1190 1191#if TDB_DEBUG 1192 printf("mapped database of hash_size %u map_size=%u\n", 1193 hash_size, tdb.map_size); 1194#endif 1195 1196 tdb_brlock(&tdb, GLOBAL_LOCK, LOCK_CLEAR, F_WRLCK, F_SETLKW); 1197 return ret; 1198 1199 fail: 1200 if (tdb.name) free(tdb.name); 1201 if (tdb.fd != -1) close(tdb.fd); 1202 if (tdb.map_ptr) munmap(tdb.map_ptr, tdb.map_size); 1203 1204 return NULL; 1205} 1206 1207/* close a database */ 1208int tdb_close(TDB_CONTEXT *tdb) 1209{ 1210 if (!tdb) return -1; 1211 1212 if (tdb->name) free(tdb->name); 1213 if (tdb->fd != -1) close(tdb->fd); 1214 if (tdb->locked) free(tdb->locked); 1215 1216 if (tdb->map_ptr) { 1217 if (tdb->fd != -1) { 1218 munmap(tdb->map_ptr, tdb->map_size); 1219 } else { 1220 free(tdb->map_ptr); 1221 } 1222 } 1223 1224 memset(tdb, 0, sizeof(*tdb)); 1225 free(tdb); 1226 1227 return 0; 1228} 1229 1230/* lock the database. If we already have it locked then don't do anything */ 1231int tdb_writelock(TDB_CONTEXT *tdb) 1232{ 1233 if (tdb == NULL) { 1234#ifdef TDB_DEBUG 1235 printf("tdb_writelock() called with null context\n"); 1236#endif 1237 return -1; 1238 } 1239 1240 return tdb_lock(tdb, -1); 1241} 1242 1243/* unlock the database. */ 1244int tdb_writeunlock(TDB_CONTEXT *tdb) 1245{ 1246 if (tdb == NULL) { 1247#ifdef TDB_DEBUG 1248 printf("tdb_writeunlock() called with null context\n"); 1249#endif 1250 return -1; 1251 } 1252 1253 return tdb_unlock(tdb, -1); 1254} 1255 1256/* lock one hash chain. This is meant to be used to reduce locking 1257 contention - it cannot guarantee how many records will be locked */ 1258int tdb_lockchain(TDB_CONTEXT *tdb, TDB_DATA key) 1259{ 1260 if (tdb == NULL) { 1261#ifdef TDB_DEBUG 1262 printf("tdb_lockchain() called with null context\n"); 1263#endif 1264 return -1; 1265 } 1266 1267 return tdb_lock(tdb, BUCKET(tdb_hash(&key))); 1268} 1269 1270 1271/* unlock one hash chain */ 1272int tdb_unlockchain(TDB_CONTEXT *tdb, TDB_DATA key) 1273{ 1274 if (tdb == NULL) { 1275#ifdef TDB_DEBUG 1276 printf("tdb_unlockchain() called with null context\n"); 1277#endif 1278 return -1; 1279 } 1280 1281 return tdb_unlock(tdb, BUCKET(tdb_hash(&key))); 1282} 1283