1 /* 2 Unix SMB/CIFS implementation. 3 4 trivial database library 5 6 Copyright (C) Andrew Tridgell 1999-2005 7 Copyright (C) Paul `Rusty' Russell 2000 8 Copyright (C) Jeremy Allison 2000-2003 9 10 ** NOTE! The following LGPL license applies to the tdb 11 ** library. This does NOT imply that all of Samba is released 12 ** under the LGPL 13 14 This library is free software; you can redistribute it and/or 15 modify it under the terms of the GNU Lesser General Public 16 License as published by the Free Software Foundation; either 17 version 3 of the License, or (at your option) any later version. 18 19 This library is distributed in the hope that it will be useful, 20 but WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 Lesser General Public License for more details. 23 24 You should have received a copy of the GNU Lesser General Public 25 License along with this library; if not, see <http://www.gnu.org/licenses/>. 26*/ 27 28#include "tdb_private.h" 29 30/* all contexts, to ensure no double-opens (fcntl locks don't nest!) */ 31static struct tdb_context *tdbs = NULL; 32 33 34/* This is based on the hash algorithm from gdbm */ 35static unsigned int default_tdb_hash(TDB_DATA *key) 36{ 37 uint32_t value; /* Used to compute the hash value. */ 38 uint32_t i; /* Used to cycle through random values. */ 39 40 /* Set the initial value from the key size. */ 41 for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++) 42 value = (value + (key->dptr[i] << (i*5 % 24))); 43 44 return (1103515243 * value + 12345); 45} 46 47 48/* initialise a new database with a specified hash size */ 49static int tdb_new_database(struct tdb_context *tdb, int hash_size) 50{ 51 struct tdb_header *newdb; 52 size_t size; 53 int ret = -1; 54 ssize_t written; 55 56 /* We make it up in memory, then write it out if not internal */ 57 size = sizeof(struct tdb_header) + (hash_size+1)*sizeof(tdb_off_t); 58 if (!(newdb = (struct tdb_header *)calloc(size, 1))) { 59 tdb->ecode = TDB_ERR_OOM; 60 return -1; 61 } 62 63 /* Fill in the header */ 64 newdb->version = TDB_VERSION; 65 newdb->hash_size = hash_size; 66 if (tdb->flags & TDB_INTERNAL) { 67 tdb->map_size = size; 68 tdb->map_ptr = (char *)newdb; 69 memcpy(&tdb->header, newdb, sizeof(tdb->header)); 70 /* Convert the `ondisk' version if asked. */ 71 CONVERT(*newdb); 72 return 0; 73 } 74 if (lseek(tdb->fd, 0, SEEK_SET) == -1) 75 goto fail; 76 77 if (ftruncate(tdb->fd, 0) == -1) 78 goto fail; 79 80 /* This creates an endian-converted header, as if read from disk */ 81 CONVERT(*newdb); 82 memcpy(&tdb->header, newdb, sizeof(tdb->header)); 83 /* Don't endian-convert the magic food! */ 84 memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1); 85 /* we still have "ret == -1" here */ 86 written = write(tdb->fd, newdb, size); 87 if (written == size) { 88 ret = 0; 89 } else if (written != -1) { 90 /* call write once again, this usually should return -1 and 91 * set errno appropriately */ 92 size -= written; 93 written = write(tdb->fd, newdb+written, size); 94 if (written == size) { 95 ret = 0; 96 } else if (written >= 0) { 97 /* a second incomplete write - we give up. 98 * guessing the errno... */ 99 errno = ENOSPC; 100 } 101 } 102 103 fail: 104 SAFE_FREE(newdb); 105 return ret; 106} 107 108 109 110static int tdb_already_open(dev_t device, 111 ino_t ino) 112{ 113 struct tdb_context *i; 114 115 for (i = tdbs; i; i = i->next) { 116 if (i->device == device && i->inode == ino) { 117 return 1; 118 } 119 } 120 121 return 0; 122} 123 124/* open the database, creating it if necessary 125 126 The open_flags and mode are passed straight to the open call on the 127 database file. A flags value of O_WRONLY is invalid. The hash size 128 is advisory, use zero for a default value. 129 130 Return is NULL on error, in which case errno is also set. Don't 131 try to call tdb_error or tdb_errname, just do strerror(errno). 132 133 @param name may be NULL for internal databases. */ 134struct tdb_context *tdb_open(const char *name, int hash_size, int tdb_flags, 135 int open_flags, mode_t mode) 136{ 137 return tdb_open_ex(name, hash_size, tdb_flags, open_flags, mode, NULL, NULL); 138} 139 140/* a default logging function */ 141static void null_log_fn(struct tdb_context *tdb, enum tdb_debug_level level, const char *fmt, ...) PRINTF_ATTRIBUTE(3, 4); 142static void null_log_fn(struct tdb_context *tdb, enum tdb_debug_level level, const char *fmt, ...) 143{ 144} 145 146 147struct tdb_context *tdb_open_ex(const char *name, int hash_size, int tdb_flags, 148 int open_flags, mode_t mode, 149 const struct tdb_logging_context *log_ctx, 150 tdb_hash_func hash_fn) 151{ 152 struct tdb_context *tdb; 153 struct stat st; 154 int rev = 0, locked = 0; 155 unsigned char *vp; 156 uint32_t vertest; 157 unsigned v; 158 159 if (!(tdb = (struct tdb_context *)calloc(1, sizeof *tdb))) { 160 /* Can't log this */ 161 errno = ENOMEM; 162 goto fail; 163 } 164 tdb_io_init(tdb); 165 tdb->fd = -1; 166#ifdef TDB_TRACE 167 tdb->tracefd = -1; 168#endif 169 tdb->name = NULL; 170 tdb->map_ptr = NULL; 171 tdb->flags = tdb_flags; 172 tdb->open_flags = open_flags; 173 if (log_ctx) { 174 tdb->log = *log_ctx; 175 } else { 176 tdb->log.log_fn = null_log_fn; 177 tdb->log.log_private = NULL; 178 } 179 tdb->hash_fn = hash_fn ? hash_fn : default_tdb_hash; 180 181 /* cache the page size */ 182 tdb->page_size = getpagesize(); 183 if (tdb->page_size <= 0) { 184 tdb->page_size = 0x2000; 185 } 186 187 tdb->max_dead_records = (tdb_flags & TDB_VOLATILE) ? 5 : 0; 188 189 if ((open_flags & O_ACCMODE) == O_WRONLY) { 190 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: can't open tdb %s write-only\n", 191 name)); 192 errno = EINVAL; 193 goto fail; 194 } 195 196 if (hash_size == 0) 197 hash_size = DEFAULT_HASH_SIZE; 198 if ((open_flags & O_ACCMODE) == O_RDONLY) { 199 tdb->read_only = 1; 200 /* read only databases don't do locking or clear if first */ 201 tdb->flags |= TDB_NOLOCK; 202 tdb->flags &= ~TDB_CLEAR_IF_FIRST; 203 } 204 205 if ((tdb->flags & TDB_ALLOW_NESTING) && 206 (tdb->flags & TDB_DISALLOW_NESTING)) { 207 tdb->ecode = TDB_ERR_NESTING; 208 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " 209 "allow_nesting and disallow_nesting are not allowed together!")); 210 errno = EINVAL; 211 goto fail; 212 } 213 214 /* 215 * TDB_ALLOW_NESTING is the default behavior. 216 * Note: this may change in future versions! 217 */ 218 if (!(tdb->flags & TDB_DISALLOW_NESTING)) { 219 tdb->flags |= TDB_ALLOW_NESTING; 220 } 221 222 /* internal databases don't mmap or lock, and start off cleared */ 223 if (tdb->flags & TDB_INTERNAL) { 224 tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP); 225 tdb->flags &= ~TDB_CLEAR_IF_FIRST; 226 if (tdb_new_database(tdb, hash_size) != 0) { 227 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: tdb_new_database failed!")); 228 goto fail; 229 } 230 goto internal; 231 } 232 233 if ((tdb->fd = open(name, open_flags, mode)) == -1) { 234 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_open_ex: could not open file %s: %s\n", 235 name, strerror(errno))); 236 goto fail; /* errno set by open(2) */ 237 } 238 239 /* on exec, don't inherit the fd */ 240 v = fcntl(tdb->fd, F_GETFD, 0); 241 fcntl(tdb->fd, F_SETFD, v | FD_CLOEXEC); 242 243 /* ensure there is only one process initialising at once */ 244 if (tdb->methods->tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) { 245 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to get global lock on %s: %s\n", 246 name, strerror(errno))); 247 goto fail; /* errno set by tdb_brlock */ 248 } 249 250 /* we need to zero database if we are the only one with it open */ 251 if ((tdb_flags & TDB_CLEAR_IF_FIRST) && 252 (!tdb->read_only) && 253 (locked = (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_WRLCK, F_SETLK, 0, 1) == 0))) { 254 open_flags |= O_CREAT; 255 if (ftruncate(tdb->fd, 0) == -1) { 256 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " 257 "failed to truncate %s: %s\n", 258 name, strerror(errno))); 259 goto fail; /* errno set by ftruncate */ 260 } 261 } 262 263 errno = 0; 264 if (read(tdb->fd, &tdb->header, sizeof(tdb->header)) != sizeof(tdb->header) 265 || strcmp(tdb->header.magic_food, TDB_MAGIC_FOOD) != 0) { 266 if (!(open_flags & O_CREAT) || tdb_new_database(tdb, hash_size) == -1) { 267 if (errno == 0) { 268 errno = EIO; /* ie bad format or something */ 269 } 270 goto fail; 271 } 272 rev = (tdb->flags & TDB_CONVERT); 273 } else if (tdb->header.version != TDB_VERSION 274 && !(rev = (tdb->header.version==TDB_BYTEREV(TDB_VERSION)))) { 275 /* wrong version */ 276 errno = EIO; 277 goto fail; 278 } 279 vp = (unsigned char *)&tdb->header.version; 280 vertest = (((uint32_t)vp[0]) << 24) | (((uint32_t)vp[1]) << 16) | 281 (((uint32_t)vp[2]) << 8) | (uint32_t)vp[3]; 282 tdb->flags |= (vertest==TDB_VERSION) ? TDB_BIGENDIAN : 0; 283 if (!rev) 284 tdb->flags &= ~TDB_CONVERT; 285 else { 286 tdb->flags |= TDB_CONVERT; 287 tdb_convert(&tdb->header, sizeof(tdb->header)); 288 } 289 if (fstat(tdb->fd, &st) == -1) 290 goto fail; 291 292 if (tdb->header.rwlocks != 0) { 293 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: spinlocks no longer supported\n")); 294 goto fail; 295 } 296 297 /* Is it already in the open list? If so, fail. */ 298 if (tdb_already_open(st.st_dev, st.st_ino)) { 299 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " 300 "%s (%d,%d) is already open in this process\n", 301 name, (int)st.st_dev, (int)st.st_ino)); 302 errno = EBUSY; 303 goto fail; 304 } 305 306 if (!(tdb->name = (char *)strdup(name))) { 307 errno = ENOMEM; 308 goto fail; 309 } 310 311 tdb->map_size = st.st_size; 312 tdb->device = st.st_dev; 313 tdb->inode = st.st_ino; 314 tdb_mmap(tdb); 315 if (locked) { 316 if (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_UNLCK, F_SETLK, 0, 1) == -1) { 317 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " 318 "failed to take ACTIVE_LOCK on %s: %s\n", 319 name, strerror(errno))); 320 goto fail; 321 } 322 323 } 324 325 /* We always need to do this if the CLEAR_IF_FIRST flag is set, even if 326 we didn't get the initial exclusive lock as we need to let all other 327 users know we're using it. */ 328 329 if (tdb_flags & TDB_CLEAR_IF_FIRST) { 330 /* leave this lock in place to indicate it's in use */ 331 if (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0, 1) == -1) 332 goto fail; 333 } 334 335 /* if needed, run recovery */ 336 if (tdb_transaction_recover(tdb) == -1) { 337 goto fail; 338 } 339 340#ifdef TDB_TRACE 341 { 342 char tracefile[strlen(name) + 32]; 343 344 snprintf(tracefile, sizeof(tracefile), 345 "%s.trace.%li", name, (long)getpid()); 346 tdb->tracefd = open(tracefile, O_WRONLY|O_CREAT|O_EXCL, 0600); 347 if (tdb->tracefd >= 0) { 348 tdb_enable_seqnum(tdb); 349 tdb_trace_open(tdb, "tdb_open", hash_size, tdb_flags, 350 open_flags); 351 } else 352 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to open trace file %s!\n", tracefile)); 353 } 354#endif 355 356 internal: 357 /* Internal (memory-only) databases skip all the code above to 358 * do with disk files, and resume here by releasing their 359 * global lock and hooking into the active list. */ 360 if (tdb->methods->tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1) == -1) 361 goto fail; 362 tdb->next = tdbs; 363 tdbs = tdb; 364 return tdb; 365 366 fail: 367 { int save_errno = errno; 368 369 if (!tdb) 370 return NULL; 371 372#ifdef TDB_TRACE 373 close(tdb->tracefd); 374#endif 375 if (tdb->map_ptr) { 376 if (tdb->flags & TDB_INTERNAL) 377 SAFE_FREE(tdb->map_ptr); 378 else 379 tdb_munmap(tdb); 380 } 381 SAFE_FREE(tdb->name); 382 if (tdb->fd != -1) 383 if (close(tdb->fd) != 0) 384 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: failed to close tdb->fd on error!\n")); 385 SAFE_FREE(tdb); 386 errno = save_errno; 387 return NULL; 388 } 389} 390 391/* 392 * Set the maximum number of dead records per hash chain 393 */ 394 395void tdb_set_max_dead(struct tdb_context *tdb, int max_dead) 396{ 397 tdb->max_dead_records = max_dead; 398} 399 400/** 401 * Close a database. 402 * 403 * @returns -1 for error; 0 for success. 404 **/ 405int tdb_close(struct tdb_context *tdb) 406{ 407 struct tdb_context **i; 408 int ret = 0; 409 410 tdb_trace(tdb, "tdb_close"); 411 if (tdb->transaction) { 412 _tdb_transaction_cancel(tdb); 413 } 414 415 if (tdb->map_ptr) { 416 if (tdb->flags & TDB_INTERNAL) 417 SAFE_FREE(tdb->map_ptr); 418 else 419 tdb_munmap(tdb); 420 } 421 SAFE_FREE(tdb->name); 422 if (tdb->fd != -1) { 423 ret = close(tdb->fd); 424 tdb->fd = -1; 425 } 426 SAFE_FREE(tdb->lockrecs); 427 428 /* Remove from contexts list */ 429 for (i = &tdbs; *i; i = &(*i)->next) { 430 if (*i == tdb) { 431 *i = tdb->next; 432 break; 433 } 434 } 435 436#ifdef TDB_TRACE 437 close(tdb->tracefd); 438#endif 439 memset(tdb, 0, sizeof(*tdb)); 440 SAFE_FREE(tdb); 441 442 return ret; 443} 444 445/* register a loging function */ 446void tdb_set_logging_function(struct tdb_context *tdb, 447 const struct tdb_logging_context *log_ctx) 448{ 449 tdb->log = *log_ctx; 450} 451 452void *tdb_get_logging_private(struct tdb_context *tdb) 453{ 454 return tdb->log.log_private; 455} 456 457static int tdb_reopen_internal(struct tdb_context *tdb, bool active_lock) 458{ 459#if !defined(LIBREPLACE_PREAD_NOT_REPLACED) || \ 460 !defined(LIBREPLACE_PWRITE_NOT_REPLACED) 461 struct stat st; 462#endif 463 464 if (tdb->flags & TDB_INTERNAL) { 465 return 0; /* Nothing to do. */ 466 } 467 468 if (tdb->num_locks != 0 || tdb->global_lock.count) { 469 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_reopen: reopen not allowed with locks held\n")); 470 goto fail; 471 } 472 473 if (tdb->transaction != 0) { 474 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_reopen: reopen not allowed inside a transaction\n")); 475 goto fail; 476 } 477 478/* If we have real pread & pwrite, we can skip reopen. */ 479#if !defined(LIBREPLACE_PREAD_NOT_REPLACED) || \ 480 !defined(LIBREPLACE_PWRITE_NOT_REPLACED) 481 if (tdb_munmap(tdb) != 0) { 482 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: munmap failed (%s)\n", strerror(errno))); 483 goto fail; 484 } 485 if (close(tdb->fd) != 0) 486 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: WARNING closing tdb->fd failed!\n")); 487 tdb->fd = open(tdb->name, tdb->open_flags & ~(O_CREAT|O_TRUNC), 0); 488 if (tdb->fd == -1) { 489 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: open failed (%s)\n", strerror(errno))); 490 goto fail; 491 } 492 if (fstat(tdb->fd, &st) != 0) { 493 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: fstat failed (%s)\n", strerror(errno))); 494 goto fail; 495 } 496 if (st.st_ino != tdb->inode || st.st_dev != tdb->device) { 497 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: file dev/inode has changed!\n")); 498 goto fail; 499 } 500 tdb_mmap(tdb); 501#endif /* fake pread or pwrite */ 502 503 if (active_lock && 504 (tdb->methods->tdb_brlock(tdb, ACTIVE_LOCK, F_RDLCK, F_SETLKW, 0, 1) == -1)) { 505 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: failed to obtain active lock\n")); 506 goto fail; 507 } 508 509 return 0; 510 511fail: 512 tdb_close(tdb); 513 return -1; 514} 515 516/* reopen a tdb - this can be used after a fork to ensure that we have an independent 517 seek pointer from our parent and to re-establish locks */ 518int tdb_reopen(struct tdb_context *tdb) 519{ 520 return tdb_reopen_internal(tdb, tdb->flags & TDB_CLEAR_IF_FIRST); 521} 522 523/* reopen all tdb's */ 524int tdb_reopen_all(int parent_longlived) 525{ 526 struct tdb_context *tdb; 527 528 for (tdb=tdbs; tdb; tdb = tdb->next) { 529 bool active_lock = (tdb->flags & TDB_CLEAR_IF_FIRST); 530 531 /* 532 * If the parent is longlived (ie. a 533 * parent daemon architecture), we know 534 * it will keep it's active lock on a 535 * tdb opened with CLEAR_IF_FIRST. Thus 536 * for child processes we don't have to 537 * add an active lock. This is essential 538 * to improve performance on systems that 539 * keep POSIX locks as a non-scalable data 540 * structure in the kernel. 541 */ 542 if (parent_longlived) { 543 /* Ensure no clear-if-first. */ 544 active_lock = false; 545 } 546 547 if (tdb_reopen_internal(tdb, active_lock) != 0) 548 return -1; 549 } 550 551 return 0; 552} 553