1/* 2 * Database functions 3 * Copyright (C) Andrew Tridgell 1999 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms AND provided that this software or 8 * any derived work is only used as part of the PPP daemon (pppd) 9 * and related utilities. 10 * The name of the author may not be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 13 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 14 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 15 * 16 * Note: this software is also available under the Gnu Public License 17 * version 2 or later. 18 */ 19 20#include <stdlib.h> 21#include <stdio.h> 22#include <fcntl.h> 23#include <unistd.h> 24#include <string.h> 25#include <fcntl.h> 26#include <errno.h> 27#include <sys/mman.h> 28#include <sys/stat.h> 29#include "tdb.h" 30 31#define TDB_VERSION (0x26011967 + 1) 32#define TDB_MAGIC (0x26011999U) 33#define TDB_FREE_MAGIC (~TDB_MAGIC) 34#define TDB_ALIGN 4 35#define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGN) 36#define DEFAULT_HASH_SIZE 128 37#define TDB_PAGE_SIZE 0x2000 38#define TDB_LEN_MULTIPLIER 10 39#define FREELIST_TOP (sizeof(struct tdb_header)) 40 41#define LOCK_SET 1 42#define LOCK_CLEAR 0 43 44/* lock offsets */ 45#define GLOBAL_LOCK 0 46#define ACTIVE_LOCK 4 47#define LIST_LOCK_BASE 1024 48 49#define BUCKET(hash) ((hash) % tdb->header.hash_size) 50 51#ifndef MAP_FILE 52#define MAP_FILE 0 53#endif 54 55/* the body of the database is made of one list_struct for the free space 56 plus a separate data list for each hash value */ 57struct list_struct { 58 tdb_len rec_len; /* total byte length of record */ 59 tdb_off next; /* offset of the next record in the list */ 60 tdb_len key_len; /* byte length of key */ 61 tdb_len data_len; /* byte length of data */ 62 unsigned full_hash; /* the full 32 bit hash of the key */ 63 unsigned magic; /* try to catch errors */ 64 /* 65 the following union is implied 66 union { 67 char record[rec_len]; 68 struct { 69 char key[key_len]; 70 char data[data_len]; 71 } 72 } 73 */ 74}; 75 76/* a null data record - useful for error returns */ 77static TDB_DATA null_data; 78 79/* a byte range locking function - return 0 on success 80 this functions locks/unlocks 1 byte at the specified offset */ 81static int tdb_brlock(TDB_CONTEXT *tdb, tdb_off offset, 82 int set, int rw_type, int lck_type) 83{ 84#if NOLOCK 85 return 0; 86#else 87 struct flock fl; 88 89 if (tdb->fd == -1) return 0; /* for in memory tdb */ 90 91 if (tdb->read_only) return -1; 92 93 fl.l_type = set==LOCK_SET?rw_type:F_UNLCK; 94 fl.l_whence = SEEK_SET; 95 fl.l_start = offset; 96 fl.l_len = 1; 97 fl.l_pid = 0; 98 99 if (fcntl(tdb->fd, lck_type, &fl) != 0) { 100#if TDB_DEBUG 101 if (lck_type == F_SETLKW) { 102 printf("lock %d failed at %d (%s)\n", 103 set, offset, strerror(errno)); 104 } 105#endif 106 tdb->ecode = TDB_ERR_LOCK; 107 return -1; 108 } 109 return 0; 110#endif 111} 112 113/* lock a list in the database. list -1 is the alloc list */ 114static int tdb_lock(TDB_CONTEXT *tdb, int list) 115{ 116 if (list < -1 || list >= (int)tdb->header.hash_size) { 117#if TDB_DEBUG 118 printf("bad list %d\n", list); 119#endif 120 return -1; 121 } 122 if (tdb->locked[list+1] == 0) { 123 if (tdb_brlock(tdb, LIST_LOCK_BASE + 4*list, LOCK_SET, 124 F_WRLCK, F_SETLKW) != 0) { 125 return -1; 126 } 127 } 128 tdb->locked[list+1]++; 129 return 0; 130} 131 132/* unlock the database. */ 133static int tdb_unlock(TDB_CONTEXT *tdb, int list) 134{ 135 if (list < -1 || list >= (int)tdb->header.hash_size) { 136#if TDB_DEBUG 137 printf("bad unlock list %d\n", list); 138#endif 139 return -1; 140 } 141 142 if (tdb->locked[list+1] == 0) { 143#if TDB_DEBUG 144 printf("not locked %d\n", list); 145#endif 146 tdb->ecode = TDB_ERR_LOCK; 147 return -1; 148 } 149 if (tdb->locked[list+1] == 1) { 150 if (tdb_brlock(tdb, LIST_LOCK_BASE + 4*list, LOCK_CLEAR, 151 F_WRLCK, F_SETLKW) != 0) { 152 return -1; 153 } 154 } 155 tdb->locked[list+1]--; 156 return 0; 157} 158 159/* the hash algorithm - turn a key into an integer 160 This is based on the hash agorithm from gdbm */ 161static unsigned tdb_hash(TDB_DATA *key) 162{ 163 unsigned value; /* Used to compute the hash value. */ 164 unsigned i; /* Used to cycle through random values. */ 165 166 /* Set the initial value from the key size. */ 167 value = 0x238F13AF * key->dsize; 168 for (i=0; i < key->dsize; i++) { 169 value = (value + (key->dptr[i] << (i*5 % 24))); 170 } 171 172 value = (1103515243 * value + 12345); 173 174 return value; 175} 176 177/* find the top of the hash chain for an open database */ 178static tdb_off tdb_hash_top(TDB_CONTEXT *tdb, unsigned hash) 179{ 180 tdb_off ret; 181 hash = BUCKET(hash); 182 ret = FREELIST_TOP + (hash+1)*sizeof(tdb_off); 183 return ret; 184} 185 186 187/* check for an out of bounds access - if it is out of bounds then 188 see if the database has been expanded by someone else and expand 189 if necessary */ 190static int tdb_oob(TDB_CONTEXT *tdb, tdb_off offset) 191{ 192 struct stat st; 193 if ((offset <= tdb->map_size) || (tdb->fd == -1)) return 0; 194 195 fstat(tdb->fd, &st); 196 if (st.st_size <= (ssize_t)offset) { 197 tdb->ecode = TDB_ERR_IO; 198 return -1; 199 } 200 201#if HAVE_MMAP 202 if (tdb->map_ptr) { 203 munmap(tdb->map_ptr, tdb->map_size); 204 tdb->map_ptr = NULL; 205 } 206#endif 207 208 tdb->map_size = st.st_size; 209#if HAVE_MMAP 210 tdb->map_ptr = (void *)mmap(NULL, tdb->map_size, 211 tdb->read_only?PROT_READ:PROT_READ|PROT_WRITE, 212 MAP_SHARED | MAP_FILE, tdb->fd, 0); 213 if (tdb->map_ptr == MAP_FAILED) { 214 tdb->map_ptr = NULL; 215 } 216#endif 217 return 0; 218} 219 220 221/* write a lump of data at a specified offset */ 222static int tdb_write(TDB_CONTEXT *tdb, tdb_off offset, const char *buf, tdb_len len) 223{ 224 if (tdb_oob(tdb, offset + len) != 0) { 225 /* oops - trying to write beyond the end of the database! */ 226 return -1; 227 } 228 229 if (tdb->map_ptr) { 230 memcpy(offset + (char *)tdb->map_ptr, buf, len); 231 } else { 232 if (lseek(tdb->fd, offset, SEEK_SET) != offset || 233 write(tdb->fd, buf, len) != (ssize_t)len) { 234 tdb->ecode = TDB_ERR_IO; 235 return -1; 236 } 237 } 238 return 0; 239} 240 241/* read a lump of data at a specified offset */ 242static int tdb_read(TDB_CONTEXT *tdb, tdb_off offset, char *buf, tdb_len len) 243{ 244 if (tdb_oob(tdb, offset + len) != 0) { 245 /* oops - trying to read beyond the end of the database! */ 246 return -1; 247 } 248 249 if (tdb->map_ptr) { 250 memcpy(buf, offset + (char *)tdb->map_ptr, len); 251 } else { 252 if (lseek(tdb->fd, offset, SEEK_SET) != offset || 253 read(tdb->fd, buf, len) != (ssize_t)len) { 254 tdb->ecode = TDB_ERR_IO; 255 return -1; 256 } 257 } 258 return 0; 259} 260 261 262/* read a lump of data, allocating the space for it */ 263static char *tdb_alloc_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_len len) 264{ 265 char *buf; 266 267 buf = (char *)malloc(len); 268 269 if (!buf) { 270 tdb->ecode = TDB_ERR_OOM; 271 return NULL; 272 } 273 274 if (tdb_read(tdb, offset, buf, len) == -1) { 275 free(buf); 276 return NULL; 277 } 278 279 return buf; 280} 281 282/* convenience routine for writing a record */ 283static int rec_write(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec) 284{ 285 return tdb_write(tdb, offset, (char *)rec, sizeof(*rec)); 286} 287 288/* convenience routine for writing a tdb_off */ 289static int ofs_write(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d) 290{ 291 return tdb_write(tdb, offset, (char *)d, sizeof(*d)); 292} 293 294/* read a tdb_off from the store */ 295static int ofs_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d) 296{ 297 return tdb_read(tdb, offset, (char *)d, sizeof(*d)); 298} 299 300/* read a record and check for simple errors */ 301static int rec_read(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec) 302{ 303 if (tdb_read(tdb, offset, (char *)rec, sizeof(*rec)) == -1) return -1; 304 if (rec->magic != TDB_MAGIC) { 305#if TDB_DEBUG 306 printf("bad magic 0x%08x at offset %d\n", 307 rec->magic, offset); 308#endif 309 tdb->ecode = TDB_ERR_CORRUPT; 310 return -1; 311 } 312 if (tdb_oob(tdb, rec->next) != 0) { 313 return -1; 314 } 315 return 0; 316} 317 318/* expand the database at least length bytes by expanding the 319 underlying file and doing the mmap again if necessary */ 320static int tdb_expand(TDB_CONTEXT *tdb, tdb_off length) 321{ 322 struct list_struct rec; 323 tdb_off offset, ptr; 324 char b = 0; 325 326 tdb_lock(tdb,-1); 327 328 /* make sure we know about any previous expansions by another 329 process */ 330 tdb_oob(tdb,tdb->map_size + 1); 331 332 /* always make room for at least 10 more records */ 333 length *= TDB_LEN_MULTIPLIER; 334 335 /* and round the database up to a multiple of TDB_PAGE_SIZE */ 336 length = ((tdb->map_size + length + TDB_PAGE_SIZE) & ~(TDB_PAGE_SIZE - 1)) - tdb->map_size; 337 338 /* expand the file itself */ 339 if (tdb->fd != -1) { 340 lseek(tdb->fd, tdb->map_size + length - 1, SEEK_SET); 341 if (write(tdb->fd, &b, 1) != 1) goto fail; 342 } 343 344 /* form a new freelist record */ 345 offset = FREELIST_TOP; 346 rec.rec_len = length - sizeof(rec); 347 rec.magic = TDB_FREE_MAGIC; 348 if (ofs_read(tdb, offset, &rec.next) == -1) { 349 goto fail; 350 } 351 352#if HAVE_MMAP 353 if (tdb->fd != -1 && tdb->map_ptr) { 354 munmap(tdb->map_ptr, tdb->map_size); 355 tdb->map_ptr = NULL; 356 } 357#endif 358 359 tdb->map_size += length; 360 361 if (tdb->fd == -1) { 362 tdb->map_ptr = realloc(tdb->map_ptr, tdb->map_size); 363 } 364 365 /* write it out */ 366 if (rec_write(tdb, tdb->map_size - length, &rec) == -1) { 367 goto fail; 368 } 369 370 /* link it into the free list */ 371 ptr = tdb->map_size - length; 372 if (ofs_write(tdb, offset, &ptr) == -1) goto fail; 373 374#if HAVE_MMAP 375 if (tdb->fd != -1) { 376 tdb->map_ptr = (void *)mmap(NULL, tdb->map_size, 377 PROT_READ|PROT_WRITE, 378 MAP_SHARED | MAP_FILE, tdb->fd, 0); 379 if (tdb->map_ptr == MAP_FAILED) { 380 tdb->map_ptr = NULL; 381 } 382 } 383#endif 384 385 tdb_unlock(tdb, -1); 386 return 0; 387 388 fail: 389 tdb_unlock(tdb,-1); 390 return -1; 391} 392 393/* allocate some space from the free list. The offset returned points 394 to a unconnected list_struct within the database with room for at 395 least length bytes of total data 396 397 0 is returned if the space could not be allocated 398 */ 399static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length) 400{ 401 tdb_off offset, rec_ptr, last_ptr; 402 struct list_struct rec, lastrec, newrec; 403 404 tdb_lock(tdb, -1); 405 406 again: 407 last_ptr = 0; 408 offset = FREELIST_TOP; 409 410 /* read in the freelist top */ 411 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 412 goto fail; 413 } 414 415 /* keep looking until we find a freelist record that is big 416 enough */ 417 while (rec_ptr) { 418 if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) { 419 goto fail; 420 } 421 422 if (rec.magic != TDB_FREE_MAGIC) { 423#if TDB_DEBUG 424 printf("bad magic 0x%08x in free list\n", rec.magic); 425#endif 426 goto fail; 427 } 428 429 if (rec.rec_len >= length) { 430 /* found it - now possibly split it up */ 431 if (rec.rec_len > length + MIN_REC_SIZE) { 432 length = (length + TDB_ALIGN) & ~(TDB_ALIGN-1); 433 434 newrec.rec_len = rec.rec_len - (sizeof(rec) + length); 435 newrec.next = rec.next; 436 newrec.magic = TDB_FREE_MAGIC; 437 438 rec.rec_len = length; 439 rec.next = rec_ptr + sizeof(rec) + rec.rec_len; 440 441 if (rec_write(tdb, rec.next, &newrec) == -1) { 442 goto fail; 443 } 444 445 if (rec_write(tdb, rec_ptr, &rec) == -1) { 446 goto fail; 447 } 448 } 449 450 /* remove it from the list */ 451 if (last_ptr == 0) { 452 offset = FREELIST_TOP; 453 454 if (ofs_write(tdb, offset, &rec.next) == -1) { 455 goto fail; 456 } 457 } else { 458 lastrec.next = rec.next; 459 if (rec_write(tdb, last_ptr, &lastrec) == -1) { 460 goto fail; 461 } 462 } 463 464 /* all done - return the new record offset */ 465 tdb_unlock(tdb, -1); 466 return rec_ptr; 467 } 468 469 /* move to the next record */ 470 lastrec = rec; 471 last_ptr = rec_ptr; 472 rec_ptr = rec.next; 473 } 474 475 /* we didn't find enough space. See if we can expand the 476 database and if we can then try again */ 477 if (tdb_expand(tdb, length + sizeof(rec)) == 0) goto again; 478 479 fail: 480#if TDB_DEBUG 481 printf("tdb_allocate failed for size %u\n", length); 482#endif 483 tdb_unlock(tdb, -1); 484 return 0; 485} 486 487/* initialise a new database with a specified hash size */ 488static int tdb_new_database(TDB_CONTEXT *tdb, int hash_size) 489{ 490 struct tdb_header header; 491 tdb_off offset; 492 int i, size = 0; 493 tdb_off buf[16]; 494 495 /* create the header */ 496 memset(&header, 0, sizeof(header)); 497 memcpy(header.magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1); 498 header.version = TDB_VERSION; 499 header.hash_size = hash_size; 500 lseek(tdb->fd, 0, SEEK_SET); 501 ftruncate(tdb->fd, 0); 502 503 if (tdb->fd != -1 && write(tdb->fd, &header, sizeof(header)) != 504 sizeof(header)) { 505 tdb->ecode = TDB_ERR_IO; 506 return -1; 507 } else size += sizeof(header); 508 509 /* the freelist and hash pointers */ 510 offset = 0; 511 memset(buf, 0, sizeof(buf)); 512 513 for (i=0;(hash_size+1)-i >= 16; i += 16) { 514 if (tdb->fd != -1 && write(tdb->fd, buf, sizeof(buf)) != 515 sizeof(buf)) { 516 tdb->ecode = TDB_ERR_IO; 517 return -1; 518 } else size += sizeof(buf); 519 } 520 521 for (;i<hash_size+1; i++) { 522 if (tdb->fd != -1 && write(tdb->fd, buf, sizeof(tdb_off)) != 523 sizeof(tdb_off)) { 524 tdb->ecode = TDB_ERR_IO; 525 return -1; 526 } else size += sizeof(tdb_off); 527 } 528 529 if (tdb->fd == -1) { 530 tdb->map_ptr = calloc(size, 1); 531 tdb->map_size = size; 532 if (tdb->map_ptr == NULL) { 533 tdb->ecode = TDB_ERR_IO; 534 return -1; 535 } 536 memcpy(&tdb->header, &header, sizeof(header)); 537 } 538 539#if TDB_DEBUG 540 printf("initialised database of hash_size %u\n", 541 hash_size); 542#endif 543 return 0; 544} 545 546/* Returns 0 on fail. On success, return offset of record, and fills 547 in rec */ 548static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, unsigned int hash, 549 struct list_struct *rec) 550{ 551 tdb_off offset, rec_ptr; 552 553 /* find the top of the hash chain */ 554 offset = tdb_hash_top(tdb, hash); 555 556 /* read in the hash top */ 557 if (ofs_read(tdb, offset, &rec_ptr) == -1) 558 return 0; 559 560 /* keep looking until we find the right record */ 561 while (rec_ptr) { 562 if (rec_read(tdb, rec_ptr, rec) == -1) 563 return 0; 564 565 if (hash == rec->full_hash && key.dsize == rec->key_len) { 566 char *k; 567 /* a very likely hit - read the key */ 568 k = tdb_alloc_read(tdb, rec_ptr + sizeof(*rec), 569 rec->key_len); 570 571 if (!k) 572 return 0; 573 574 if (memcmp(key.dptr, k, key.dsize) == 0) { 575 free(k); 576 return rec_ptr; 577 } 578 free(k); 579 } 580 581 /* move to the next record */ 582 rec_ptr = rec->next; 583 } 584 return 0; 585} 586 587/* 588 return an error string for the last tdb error 589*/ 590char *tdb_error(TDB_CONTEXT *tdb) 591{ 592 int i; 593 static struct { 594 enum TDB_ERROR ecode; 595 char *estring; 596 } emap[] = { 597 {TDB_SUCCESS, "Success"}, 598 {TDB_ERR_CORRUPT, "Corrupt database"}, 599 {TDB_ERR_IO, "IO Error"}, 600 {TDB_ERR_LOCK, "Locking error"}, 601 {TDB_ERR_OOM, "Out of memory"}, 602 {TDB_ERR_EXISTS, "Record exists"}, 603 {-1, NULL}}; 604 if (tdb != NULL) { 605 for (i=0;emap[i].estring;i++) { 606 if (tdb->ecode == emap[i].ecode) return emap[i].estring; 607 } 608 } else { 609 return "Invalid tdb context"; 610 } 611 return "Invalid error code"; 612} 613 614 615/* update an entry in place - this only works if the new data size 616 is <= the old data size and the key exists. 617 on failure return -1 618*/ 619int tdb_update(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf) 620{ 621 unsigned hash; 622 struct list_struct rec; 623 tdb_off rec_ptr; 624 int ret = -1; 625 626 if (tdb == NULL) { 627#ifdef TDB_DEBUG 628 printf("tdb_update() called with null context\n"); 629#endif 630 return -1; 631 } 632 633 /* find which hash bucket it is in */ 634 hash = tdb_hash(&key); 635 636 tdb_lock(tdb, BUCKET(hash)); 637 rec_ptr = tdb_find(tdb, key, hash, &rec); 638 639 if (!rec_ptr) 640 goto out; 641 642 /* must be long enough */ 643 if (rec.rec_len < key.dsize + dbuf.dsize) 644 goto out; 645 646 if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len, 647 dbuf.dptr, dbuf.dsize) == -1) 648 goto out; 649 650 if (dbuf.dsize != rec.data_len) { 651 /* update size */ 652 rec.data_len = dbuf.dsize; 653 ret = rec_write(tdb, rec_ptr, &rec); 654 } else 655 ret = 0; 656 657 out: 658 tdb_unlock(tdb, BUCKET(hash)); 659 return ret; 660} 661 662/* find an entry in the database given a key */ 663TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key) 664{ 665 unsigned hash; 666 tdb_off rec_ptr; 667 struct list_struct rec; 668 TDB_DATA ret = null_data; 669 670 if (tdb == NULL) { 671#ifdef TDB_DEBUG 672 printf("tdb_fetch() called with null context\n"); 673#endif 674 return null_data; 675 } 676 677 /* find which hash bucket it is in */ 678 hash = tdb_hash(&key); 679 680 tdb_lock(tdb, BUCKET(hash)); 681 rec_ptr = tdb_find(tdb, key, hash, &rec); 682 683 if (rec_ptr) { 684 ret.dptr = tdb_alloc_read(tdb, 685 rec_ptr + sizeof(rec) + rec.key_len, 686 rec.data_len); 687 ret.dsize = rec.data_len; 688 } 689 690 tdb_unlock(tdb, BUCKET(hash)); 691 return ret; 692} 693 694/* check if an entry in the database exists 695 696 note that 1 is returned if the key is found and 0 is returned if not found 697 this doesn't match the conventions in the rest of this module, but is 698 compatible with gdbm 699*/ 700int tdb_exists(TDB_CONTEXT *tdb, TDB_DATA key) 701{ 702 unsigned hash; 703 tdb_off rec_ptr; 704 struct list_struct rec; 705 706 if (tdb == NULL) { 707#ifdef TDB_DEBUG 708 printf("tdb_exists() called with null context\n"); 709#endif 710 return 0; 711 } 712 713 /* find which hash bucket it is in */ 714 hash = tdb_hash(&key); 715 716 tdb_lock(tdb, BUCKET(hash)); 717 rec_ptr = tdb_find(tdb, key, hash, &rec); 718 tdb_unlock(tdb, BUCKET(hash)); 719 720 return rec_ptr != 0; 721} 722 723/* traverse the entire database - calling fn(tdb, key, data) on each element. 724 return -1 on error or the record count traversed 725 if fn is NULL then it is not called 726 a non-zero return value from fn() indicates that the traversal should stop 727 */ 728int tdb_traverse(TDB_CONTEXT *tdb, int (*fn)(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, void* state), void* state) 729{ 730 int count = 0; 731 unsigned h; 732 tdb_off offset, rec_ptr; 733 struct list_struct rec; 734 char *data; 735 TDB_DATA key, dbuf; 736 737 if (tdb == NULL) { 738#ifdef TDB_DEBUG 739 printf("tdb_traverse() called with null context\n"); 740#endif 741 return -1; 742 } 743 744 /* loop over all hash chains */ 745 for (h = 0; h < tdb->header.hash_size; h++) { 746 tdb_lock(tdb, BUCKET(h)); 747 748 /* read in the hash top */ 749 offset = tdb_hash_top(tdb, h); 750 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 751 goto fail; 752 } 753 754 /* traverse all records for this hash */ 755 while (rec_ptr) { 756 if (rec_read(tdb, rec_ptr, &rec) == -1) { 757 goto fail; 758 } 759 760 /* now read the full record */ 761 data = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), 762 rec.key_len + rec.data_len); 763 if (!data) { 764 goto fail; 765 } 766 767 key.dptr = data; 768 key.dsize = rec.key_len; 769 dbuf.dptr = data + rec.key_len; 770 dbuf.dsize = rec.data_len; 771 count++; 772 773 if (fn && fn(tdb, key, dbuf, state) != 0) { 774 /* they want us to stop traversing */ 775 free(data); 776 tdb_unlock(tdb, BUCKET(h)); 777 return count; 778 } 779 780 /* a miss - drat */ 781 free(data); 782 783 /* move to the next record */ 784 rec_ptr = rec.next; 785 } 786 tdb_unlock(tdb, BUCKET(h)); 787 } 788 789 /* return the number traversed */ 790 return count; 791 792 fail: 793 tdb_unlock(tdb, BUCKET(h)); 794 return -1; 795} 796 797 798/* find the first entry in the database and return its key */ 799TDB_DATA tdb_firstkey(TDB_CONTEXT *tdb) 800{ 801 tdb_off offset, rec_ptr; 802 struct list_struct rec; 803 unsigned hash; 804 TDB_DATA ret; 805 806 if (tdb == NULL) { 807#ifdef TDB_DEBUG 808 printf("tdb_firstkey() called with null context\n"); 809#endif 810 return null_data; 811 } 812 813 /* look for a non-empty hash chain */ 814 for (hash = 0, rec_ptr = 0; 815 hash < tdb->header.hash_size; 816 hash++) { 817 /* find the top of the hash chain */ 818 offset = tdb_hash_top(tdb, hash); 819 820 tdb_lock(tdb, BUCKET(hash)); 821 822 /* read in the hash top */ 823 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 824 goto fail; 825 } 826 827 if (rec_ptr) break; 828 829 tdb_unlock(tdb, BUCKET(hash)); 830 } 831 832 if (rec_ptr == 0) return null_data; 833 834 /* we've found a non-empty chain, now read the record */ 835 if (rec_read(tdb, rec_ptr, &rec) == -1) { 836 goto fail; 837 } 838 839 /* allocate and read the key space */ 840 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len); 841 ret.dsize = rec.key_len; 842 tdb_unlock(tdb, BUCKET(hash)); 843 return ret; 844 845 fail: 846 tdb_unlock(tdb, BUCKET(hash)); 847 return null_data; 848} 849 850/* find the next entry in the database, returning its key */ 851TDB_DATA tdb_nextkey(TDB_CONTEXT *tdb, TDB_DATA key) 852{ 853 unsigned hash, hbucket; 854 tdb_off rec_ptr, offset; 855 struct list_struct rec; 856 TDB_DATA ret; 857 858 if (tdb == NULL) { 859#ifdef TDB_DEBUG 860 printf("tdb_nextkey() called with null context\n"); 861#endif 862 return null_data; 863 } 864 865 /* find which hash bucket it is in */ 866 hash = tdb_hash(&key); 867 hbucket = BUCKET(hash); 868 869 tdb_lock(tdb, hbucket); 870 rec_ptr = tdb_find(tdb, key, hash, &rec); 871 if (rec_ptr) { 872 /* we want the next record after this one */ 873 rec_ptr = rec.next; 874 } 875 876 /* not found or last in hash: look for next non-empty hash chain */ 877 while (rec_ptr == 0) { 878 tdb_unlock(tdb, hbucket); 879 880 if (++hbucket >= tdb->header.hash_size - 1) 881 return null_data; 882 883 offset = tdb_hash_top(tdb, hbucket); 884 tdb_lock(tdb, hbucket); 885 /* read in the hash top */ 886 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 887 tdb_unlock(tdb, hbucket); 888 return null_data; 889 } 890 } 891 892 /* Read the record. */ 893 if (rec_read(tdb, rec_ptr, &rec) == -1) { 894 tdb_unlock(tdb, hbucket); 895 return null_data; 896 } 897 /* allocate and read the key */ 898 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len); 899 ret.dsize = rec.key_len; 900 tdb_unlock(tdb, hbucket); 901 902 return ret; 903} 904 905/* delete an entry in the database given a key */ 906int tdb_delete(TDB_CONTEXT *tdb, TDB_DATA key) 907{ 908 unsigned hash; 909 tdb_off offset, rec_ptr, last_ptr; 910 struct list_struct rec, lastrec; 911 char *data = NULL; 912 913 if (tdb == NULL) { 914#ifdef TDB_DEBUG 915 printf("tdb_delete() called with null context\n"); 916#endif 917 return -1; 918 } 919 920 /* find which hash bucket it is in */ 921 hash = tdb_hash(&key); 922 923 tdb_lock(tdb, BUCKET(hash)); 924 925 /* find the top of the hash chain */ 926 offset = tdb_hash_top(tdb, hash); 927 928 /* read in the hash top */ 929 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 930 goto fail; 931 } 932 933 last_ptr = 0; 934 935 /* keep looking until we find the right record */ 936 while (rec_ptr) { 937 if (rec_read(tdb, rec_ptr, &rec) == -1) { 938 goto fail; 939 } 940 941 if (hash == rec.full_hash && key.dsize == rec.key_len) { 942 /* a very likely hit - read the record and full key */ 943 data = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), 944 rec.key_len); 945 if (!data) { 946 goto fail; 947 } 948 949 if (memcmp(key.dptr, data, key.dsize) == 0) { 950 /* a definite match - delete it */ 951 if (last_ptr == 0) { 952 offset = tdb_hash_top(tdb, hash); 953 if (ofs_write(tdb, offset, &rec.next) == -1) { 954 goto fail; 955 } 956 } else { 957 lastrec.next = rec.next; 958 if (rec_write(tdb, last_ptr, &lastrec) == -1) { 959 goto fail; 960 } 961 } 962 tdb_unlock(tdb, BUCKET(hash)); 963 tdb_lock(tdb, -1); 964 /* and recover the space */ 965 offset = FREELIST_TOP; 966 if (ofs_read(tdb, offset, &rec.next) == -1) { 967 goto fail2; 968 } 969 rec.magic = TDB_FREE_MAGIC; 970 if (rec_write(tdb, rec_ptr, &rec) == -1) { 971 goto fail2; 972 } 973 if (ofs_write(tdb, offset, &rec_ptr) == -1) { 974 goto fail2; 975 } 976 977 /* yipee - all done */ 978 free(data); 979 tdb_unlock(tdb, -1); 980 return 0; 981 } 982 983 /* a miss - drat */ 984 free(data); 985 data = NULL; 986 } 987 988 /* move to the next record */ 989 last_ptr = rec_ptr; 990 lastrec = rec; 991 rec_ptr = rec.next; 992 } 993 994 fail: 995 if (data) free(data); 996 tdb_unlock(tdb, BUCKET(hash)); 997 return -1; 998 999 fail2: 1000 if (data) free(data); 1001 tdb_unlock(tdb, -1); 1002 return -1; 1003} 1004 1005 1006/* store an element in the database, replacing any existing element 1007 with the same key 1008 1009 return 0 on success, -1 on failure 1010*/ 1011int tdb_store(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, int flag) 1012{ 1013 struct list_struct rec; 1014 unsigned hash; 1015 tdb_off rec_ptr, offset; 1016 char *p = NULL; 1017 1018 if (tdb == NULL) { 1019#ifdef TDB_DEBUG 1020 printf("tdb_store() called with null context\n"); 1021#endif 1022 return -1; 1023 } 1024 1025 /* find which hash bucket it is in */ 1026 hash = tdb_hash(&key); 1027 1028 /* check for it existing */ 1029 if (flag == TDB_INSERT && tdb_exists(tdb, key)) { 1030 tdb->ecode = TDB_ERR_EXISTS; 1031 return -1; 1032 } 1033 1034 /* first try in-place update */ 1035 if (flag != TDB_INSERT && tdb_update(tdb, key, dbuf) == 0) { 1036 return 0; 1037 } 1038 1039 rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize); 1040 if (rec_ptr == 0) { 1041 return -1; 1042 } 1043 1044 tdb_lock(tdb, BUCKET(hash)); 1045 1046 /* delete any existing record - if it doesn't exist we don't care */ 1047 if (flag != TDB_INSERT) { 1048 tdb_delete(tdb, key); 1049 } 1050 1051 /* read the newly created record */ 1052 if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) { 1053 goto fail; 1054 } 1055 1056 if (rec.magic != TDB_FREE_MAGIC) goto fail; 1057 1058 /* find the top of the hash chain */ 1059 offset = tdb_hash_top(tdb, hash); 1060 1061 /* read in the hash top diretcly into our next pointer */ 1062 if (ofs_read(tdb, offset, &rec.next) == -1) { 1063 goto fail; 1064 } 1065 1066 rec.key_len = key.dsize; 1067 rec.data_len = dbuf.dsize; 1068 rec.full_hash = hash; 1069 rec.magic = TDB_MAGIC; 1070 1071 p = (char *)malloc(sizeof(rec) + key.dsize + dbuf.dsize); 1072 if (!p) { 1073 tdb->ecode = TDB_ERR_OOM; 1074 goto fail; 1075 } 1076 1077 memcpy(p, &rec, sizeof(rec)); 1078 memcpy(p+sizeof(rec), key.dptr, key.dsize); 1079 memcpy(p+sizeof(rec)+key.dsize, dbuf.dptr, dbuf.dsize); 1080 1081 if (tdb_write(tdb, rec_ptr, p, sizeof(rec)+key.dsize+dbuf.dsize) == -1) 1082 goto fail; 1083 1084 free(p); 1085 p = NULL; 1086 1087 /* and point the top of the hash chain at it */ 1088 if (ofs_write(tdb, offset, &rec_ptr) == -1) goto fail; 1089 1090 tdb_unlock(tdb, BUCKET(hash)); 1091 return 0; 1092 1093 fail: 1094#if TDB_DEBUG 1095 printf("store failed for hash 0x%08x in bucket %u\n", hash, BUCKET(hash)); 1096#endif 1097 if (p) free(p); 1098 tdb_unlock(tdb, BUCKET(hash)); 1099 return -1; 1100} 1101 1102 1103/* open the database, creating it if necessary 1104 1105 The open_flags and mode are passed straight to the open call on the database 1106 file. A flags value of O_WRONLY is invalid 1107 1108 The hash size is advisory, use zero for a default value. 1109 1110 return is NULL on error 1111*/ 1112TDB_CONTEXT *tdb_open(char *name, int hash_size, int tdb_flags, 1113 int open_flags, mode_t mode) 1114{ 1115 TDB_CONTEXT tdb, *ret; 1116 struct stat st; 1117 1118 memset(&tdb, 0, sizeof(tdb)); 1119 1120 tdb.fd = -1; 1121 tdb.name = NULL; 1122 tdb.map_ptr = NULL; 1123 1124 if ((open_flags & O_ACCMODE) == O_WRONLY) { 1125 goto fail; 1126 } 1127 1128 if (hash_size == 0) hash_size = DEFAULT_HASH_SIZE; 1129 1130 tdb.read_only = ((open_flags & O_ACCMODE) == O_RDONLY); 1131 1132 if (name != NULL) { 1133 tdb.fd = open(name, open_flags, mode); 1134 if (tdb.fd == -1) { 1135 goto fail; 1136 } 1137 (void) fcntl(tdb.fd, F_SETFD, FD_CLOEXEC); 1138 } 1139 1140 /* ensure there is only one process initialising at once */ 1141 tdb_brlock(&tdb, GLOBAL_LOCK, LOCK_SET, F_WRLCK, F_SETLKW); 1142 1143 if (tdb_flags & TDB_CLEAR_IF_FIRST) { 1144 /* we need to zero the database if we are the only 1145 one with it open */ 1146 if (tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_SET, F_WRLCK, F_SETLK) == 0) { 1147 ftruncate(tdb.fd, 0); 1148 tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_CLEAR, F_WRLCK, F_SETLK); 1149 } 1150 } 1151 1152 /* leave this lock in place */ 1153 tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_SET, F_RDLCK, F_SETLKW); 1154 1155 if (read(tdb.fd, &tdb.header, sizeof(tdb.header)) != sizeof(tdb.header) || 1156 strcmp(tdb.header.magic_food, TDB_MAGIC_FOOD) != 0 || 1157 tdb.header.version != TDB_VERSION) { 1158 /* its not a valid database - possibly initialise it */ 1159 if (!(open_flags & O_CREAT)) { 1160 goto fail; 1161 } 1162 if (tdb_new_database(&tdb, hash_size) == -1) goto fail; 1163 1164 lseek(tdb.fd, 0, SEEK_SET); 1165 if (tdb.fd != -1 && read(tdb.fd, &tdb.header, 1166 sizeof(tdb.header)) != 1167 sizeof(tdb.header)) 1168 goto fail; 1169 } 1170 1171 if (tdb.fd != -1) { 1172 fstat(tdb.fd, &st); 1173 1174 /* map the database and fill in the return structure */ 1175 tdb.name = (char *)strdup(name); 1176 tdb.map_size = st.st_size; 1177 } 1178 1179 tdb.locked = (int *)calloc(tdb.header.hash_size+1, 1180 sizeof(tdb.locked[0])); 1181 if (!tdb.locked) { 1182 goto fail; 1183 } 1184 1185#if HAVE_MMAP 1186 if (tdb.fd != -1) { 1187 tdb.map_ptr = (void *)mmap(NULL, st.st_size, 1188 tdb.read_only? PROT_READ : PROT_READ|PROT_WRITE, 1189 MAP_SHARED | MAP_FILE, tdb.fd, 0); 1190 if (tdb.map_ptr == MAP_FAILED) { 1191 tdb.map_ptr = NULL; 1192 } 1193 } 1194#endif 1195 1196 ret = (TDB_CONTEXT *)malloc(sizeof(tdb)); 1197 if (!ret) goto fail; 1198 1199 *ret = tdb; 1200 1201#if TDB_DEBUG 1202 printf("mapped database of hash_size %u map_size=%u\n", 1203 hash_size, tdb.map_size); 1204#endif 1205 1206 tdb_brlock(&tdb, GLOBAL_LOCK, LOCK_CLEAR, F_WRLCK, F_SETLKW); 1207 return ret; 1208 1209 fail: 1210 if (tdb.name) free(tdb.name); 1211 if (tdb.fd != -1) close(tdb.fd); 1212 if (tdb.map_ptr) munmap(tdb.map_ptr, tdb.map_size); 1213 1214 return NULL; 1215} 1216 1217/* close a database */ 1218int tdb_close(TDB_CONTEXT *tdb) 1219{ 1220 if (!tdb) return -1; 1221 1222 if (tdb->name) free(tdb->name); 1223 if (tdb->fd != -1) close(tdb->fd); 1224 if (tdb->locked) free(tdb->locked); 1225 1226 if (tdb->map_ptr) { 1227 if (tdb->fd != -1) { 1228 munmap(tdb->map_ptr, tdb->map_size); 1229 } else { 1230 free(tdb->map_ptr); 1231 } 1232 } 1233 1234 memset(tdb, 0, sizeof(*tdb)); 1235 free(tdb); 1236 1237 return 0; 1238} 1239 1240/* lock the database. If we already have it locked then don't do anything */ 1241int tdb_writelock(TDB_CONTEXT *tdb) 1242{ 1243 if (tdb == NULL) { 1244#ifdef TDB_DEBUG 1245 printf("tdb_writelock() called with null context\n"); 1246#endif 1247 return -1; 1248 } 1249 1250 return tdb_lock(tdb, -1); 1251} 1252 1253/* unlock the database. */ 1254int tdb_writeunlock(TDB_CONTEXT *tdb) 1255{ 1256 if (tdb == NULL) { 1257#ifdef TDB_DEBUG 1258 printf("tdb_writeunlock() called with null context\n"); 1259#endif 1260 return -1; 1261 } 1262 1263 return tdb_unlock(tdb, -1); 1264} 1265 1266/* lock one hash chain. This is meant to be used to reduce locking 1267 contention - it cannot guarantee how many records will be locked */ 1268int tdb_lockchain(TDB_CONTEXT *tdb, TDB_DATA key) 1269{ 1270 if (tdb == NULL) { 1271#ifdef TDB_DEBUG 1272 printf("tdb_lockchain() called with null context\n"); 1273#endif 1274 return -1; 1275 } 1276 1277 return tdb_lock(tdb, BUCKET(tdb_hash(&key))); 1278} 1279 1280 1281/* unlock one hash chain */ 1282int tdb_unlockchain(TDB_CONTEXT *tdb, TDB_DATA key) 1283{ 1284 if (tdb == NULL) { 1285#ifdef TDB_DEBUG 1286 printf("tdb_unlockchain() called with null context\n"); 1287#endif 1288 return -1; 1289 } 1290 1291 return tdb_unlock(tdb, BUCKET(tdb_hash(&key))); 1292} 1293