1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996-2009 Oracle. All rights reserved. 5 * 6 * $Id$ 7 */ 8 9#ifndef _DB_INT_H_ 10#define _DB_INT_H_ 11 12/******************************************************* 13 * Berkeley DB ANSI/POSIX include files. 14 *******************************************************/ 15#ifdef HAVE_SYSTEM_INCLUDE_FILES 16#include <sys/types.h> 17#ifdef DIAG_MVCC 18#include <sys/mman.h> 19#endif 20#include <sys/stat.h> 21 22#if defined(__INCLUDE_SELECT_H) 23#ifdef HAVE_SYS_SELECT_H 24#include <sys/select.h> 25#endif 26#ifdef HAVE_VXWORKS 27#include <selectLib.h> 28#endif 29#endif 30 31#if TIME_WITH_SYS_TIME 32#include <sys/time.h> 33#include <time.h> 34#else 35#if HAVE_SYS_TIME_H 36#include <sys/time.h> 37#else 38#include <time.h> 39#endif 40#endif 41 42#ifdef HAVE_VXWORKS 43#include <net/uio.h> 44#else 45#include <sys/uio.h> 46#endif 47 48#if defined(__INCLUDE_NETWORKING) 49#ifdef HAVE_SYS_SOCKET_H 50#include <sys/socket.h> 51#endif 52#include <netinet/in.h> 53#include <netdb.h> 54#include <arpa/inet.h> 55#endif 56 57#if defined(STDC_HEADERS) || defined(__cplusplus) 58#include <stdarg.h> 59#else 60#include <varargs.h> 61#endif 62 63#include <ctype.h> 64#include <errno.h> 65#include <fcntl.h> 66#include <limits.h> 67#include <signal.h> 68#include <stddef.h> 69#include <stdio.h> 70#include <stdlib.h> 71#include <string.h> 72#include <unistd.h> 73 74#if defined(__INCLUDE_DIRECTORY) 75#if HAVE_DIRENT_H 76# include <dirent.h> 77# define NAMLEN(dirent) strlen((dirent)->d_name) 78#else 79# define dirent direct 80# define NAMLEN(dirent) (dirent)->d_namlen 81# if HAVE_SYS_NDIR_H 82# include <sys/ndir.h> 83# endif 84# if HAVE_SYS_DIR_H 85# include <sys/dir.h> 86# endif 87# if HAVE_NDIR_H 88# include <ndir.h> 89# endif 90#endif 91#endif /* __INCLUDE_DIRECTORY */ 92 93#endif /* !HAVE_SYSTEM_INCLUDE_FILES */ 94 95#ifdef DB_WIN32 96#include "dbinc/win_db.h" 97#endif 98 99#include "db.h" 100#include "clib_port.h" 101 102#include "dbinc/queue.h" 103#include "dbinc/shqueue.h" 104 105#if defined(__cplusplus) 106extern "C" { 107#endif 108 109/******************************************************* 110 * Forward structure declarations. 111 *******************************************************/ 112struct __db_reginfo_t; typedef struct __db_reginfo_t REGINFO; 113struct __db_txnhead; typedef struct __db_txnhead DB_TXNHEAD; 114struct __db_txnlist; typedef struct __db_txnlist DB_TXNLIST; 115struct __vrfy_childinfo;typedef struct __vrfy_childinfo VRFY_CHILDINFO; 116struct __vrfy_dbinfo; typedef struct __vrfy_dbinfo VRFY_DBINFO; 117struct __vrfy_pageinfo; typedef struct __vrfy_pageinfo VRFY_PAGEINFO; 118 119typedef SH_TAILQ_HEAD(__hash_head) DB_HASHTAB; 120 121/******************************************************* 122 * General purpose constants and macros. 123 *******************************************************/ 124#undef FALSE 125#define FALSE 0 126#undef TRUE 127#define TRUE (!FALSE) 128 129#define MEGABYTE 1048576 130#define GIGABYTE 1073741824 131 132#define NS_PER_MS 1000000 /* Nanoseconds in a millisecond */ 133#define NS_PER_US 1000 /* Nanoseconds in a microsecond */ 134#define NS_PER_SEC 1000000000 /* Nanoseconds in a second */ 135#define US_PER_MS 1000 /* Microseconds in a millisecond */ 136#define US_PER_SEC 1000000 /* Microseconds in a second */ 137#define MS_PER_SEC 1000 /* Milliseconds in a second */ 138 139#define RECNO_OOB 0 /* Illegal record number. */ 140 141/* Test for a power-of-two (tests true for zero, which doesn't matter here). */ 142#define POWER_OF_TWO(x) (((x) & ((x) - 1)) == 0) 143 144/* Test for valid page sizes. */ 145#define DB_MIN_PGSIZE 0x000200 /* Minimum page size (512). */ 146#define DB_MAX_PGSIZE 0x010000 /* Maximum page size (65536). */ 147#define IS_VALID_PAGESIZE(x) \ 148 (POWER_OF_TWO(x) && (x) >= DB_MIN_PGSIZE && ((x) <= DB_MAX_PGSIZE)) 149 150/* Minimum number of pages cached, by default. */ 151#define DB_MINPAGECACHE 16 152 153/* 154 * If we are unable to determine the underlying filesystem block size, use 155 * 8K on the grounds that most OS's use less than 8K for a VM page size. 156 */ 157#define DB_DEF_IOSIZE (8 * 1024) 158 159/* Align an integer to a specific boundary. */ 160#undef DB_ALIGN 161#define DB_ALIGN(v, bound) \ 162 (((v) + (bound) - 1) & ~(((uintmax_t)(bound)) - 1)) 163 164/* Increment a pointer to a specific boundary. */ 165#undef ALIGNP_INC 166#define ALIGNP_INC(p, bound) \ 167 (void *)(((uintptr_t)(p) + (bound) - 1) & ~(((uintptr_t)(bound)) - 1)) 168 169/* 170 * Print an address as a u_long (a u_long is the largest type we can print 171 * portably). Most 64-bit systems have made longs 64-bits, so this should 172 * work. 173 */ 174#define P_TO_ULONG(p) ((u_long)(uintptr_t)(p)) 175 176/* 177 * Convert a pointer to a small integral value. 178 * 179 * The (u_int16_t)(uintptr_t) cast avoids warnings: the (uintptr_t) cast 180 * converts the value to an integral type, and the (u_int16_t) cast converts 181 * it to a small integral type so we don't get complaints when we assign the 182 * final result to an integral type smaller than uintptr_t. 183 */ 184#define P_TO_UINT32(p) ((u_int32_t)(uintptr_t)(p)) 185#define P_TO_UINT16(p) ((u_int16_t)(uintptr_t)(p)) 186 187/* 188 * There are several on-page structures that are declared to have a number of 189 * fields followed by a variable length array of items. The structure size 190 * without including the variable length array or the address of the first of 191 * those elements can be found using SSZ. 192 * 193 * This macro can also be used to find the offset of a structure element in a 194 * structure. This is used in various places to copy structure elements from 195 * unaligned memory references, e.g., pointers into a packed page. 196 * 197 * There are two versions because compilers object if you take the address of 198 * an array. 199 */ 200#undef SSZ 201#define SSZ(name, field) P_TO_UINT16(&(((name *)0)->field)) 202 203#undef SSZA 204#define SSZA(name, field) P_TO_UINT16(&(((name *)0)->field[0])) 205 206/* Structure used to print flag values. */ 207typedef struct __fn { 208 u_int32_t mask; /* Flag value. */ 209 const char *name; /* Flag name. */ 210} FN; 211 212/* Set, clear and test flags. */ 213#define FLD_CLR(fld, f) (fld) &= ~(f) 214#define FLD_ISSET(fld, f) ((fld) & (f)) 215#define FLD_SET(fld, f) (fld) |= (f) 216#define F_CLR(p, f) (p)->flags &= ~(f) 217#define F_ISSET(p, f) ((p)->flags & (f)) 218#define F_SET(p, f) (p)->flags |= (f) 219#define LF_CLR(f) ((flags) &= ~(f)) 220#define LF_ISSET(f) ((flags) & (f)) 221#define LF_SET(f) ((flags) |= (f)) 222 223/* 224 * Calculate a percentage. The values can overflow 32-bit integer arithmetic 225 * so we use floating point. 226 * 227 * When calculating a bytes-vs-page size percentage, we're getting the inverse 228 * of the percentage in all cases, that is, we want 100 minus the percentage we 229 * calculate. 230 */ 231#define DB_PCT(v, total) \ 232 ((int)((total) == 0 ? 0 : ((double)(v) * 100) / (total))) 233#define DB_PCT_PG(v, total, pgsize) \ 234 ((int)((total) == 0 ? 0 : \ 235 100 - ((double)(v) * 100) / (((double)total) * (pgsize)))) 236 237/* 238 * Statistics update shared memory and so are expensive -- don't update the 239 * values unless we're going to display the results. 240 */ 241#undef STAT 242#ifdef HAVE_STATISTICS 243#define STAT(x) x 244#else 245#define STAT(x) 246#endif 247 248/* 249 * Structure used for callback message aggregation. 250 * 251 * Display values in XXX_stat_print calls. 252 */ 253typedef struct __db_msgbuf { 254 char *buf; /* Heap allocated buffer. */ 255 char *cur; /* Current end of message. */ 256 size_t len; /* Allocated length of buffer. */ 257} DB_MSGBUF; 258#define DB_MSGBUF_INIT(a) do { \ 259 (a)->buf = (a)->cur = NULL; \ 260 (a)->len = 0; \ 261} while (0) 262#define DB_MSGBUF_FLUSH(env, a) do { \ 263 if ((a)->buf != NULL) { \ 264 if ((a)->cur != (a)->buf) \ 265 __db_msg(env, "%s", (a)->buf); \ 266 __os_free(env, (a)->buf); \ 267 DB_MSGBUF_INIT(a); \ 268 } \ 269} while (0) 270#define STAT_FMT(msg, fmt, type, v) do { \ 271 DB_MSGBUF __mb; \ 272 DB_MSGBUF_INIT(&__mb); \ 273 __db_msgadd(env, &__mb, fmt, (type)(v)); \ 274 __db_msgadd(env, &__mb, "\t%s", msg); \ 275 DB_MSGBUF_FLUSH(env, &__mb); \ 276} while (0) 277#define STAT_HEX(msg, v) \ 278 __db_msg(env, "%#lx\t%s", (u_long)(v), msg) 279#define STAT_ISSET(msg, p) \ 280 __db_msg(env, "%sSet\t%s", (p) == NULL ? "!" : " ", msg) 281#define STAT_LONG(msg, v) \ 282 __db_msg(env, "%ld\t%s", (long)(v), msg) 283#define STAT_LSN(msg, lsnp) \ 284 __db_msg(env, "%lu/%lu\t%s", \ 285 (u_long)(lsnp)->file, (u_long)(lsnp)->offset, msg) 286#define STAT_POINTER(msg, v) \ 287 __db_msg(env, "%#lx\t%s", P_TO_ULONG(v), msg) 288#define STAT_STRING(msg, p) do { \ 289 const char *__p = p; /* p may be a function call. */ \ 290 __db_msg(env, "%s\t%s", __p == NULL ? "!Set" : __p, msg); \ 291} while (0) 292#define STAT_ULONG(msg, v) \ 293 __db_msg(env, "%lu\t%s", (u_long)(v), msg) 294 295/* 296 * There are quite a few places in Berkeley DB where we want to initialize 297 * a DBT from a string or other random pointer type, using a length typed 298 * to size_t in most cases. This macro avoids a lot of casting. The macro 299 * comes in two flavors because we often want to clear the DBT first. 300 */ 301#define DB_SET_DBT(dbt, d, s) do { \ 302 (dbt).data = (void *)(d); \ 303 (dbt).size = (u_int32_t)(s); \ 304} while (0) 305#define DB_INIT_DBT(dbt, d, s) do { \ 306 memset(&(dbt), 0, sizeof(dbt)); \ 307 DB_SET_DBT(dbt, d, s); \ 308} while (0) 309 310/******************************************************* 311 * API return values 312 *******************************************************/ 313/* 314 * Return values that are OK for each different call. Most calls have a 315 * standard 'return of 0 is only OK value', but some, like db->get have 316 * DB_NOTFOUND as a return value, but it really isn't an error. 317 */ 318#define DB_RETOK_STD(ret) ((ret) == 0) 319#define DB_RETOK_DBCDEL(ret) ((ret) == 0 || (ret) == DB_KEYEMPTY || \ 320 (ret) == DB_NOTFOUND) 321#define DB_RETOK_DBCGET(ret) ((ret) == 0 || (ret) == DB_KEYEMPTY || \ 322 (ret) == DB_NOTFOUND) 323#define DB_RETOK_DBCPUT(ret) ((ret) == 0 || (ret) == DB_KEYEXIST || \ 324 (ret) == DB_NOTFOUND) 325#define DB_RETOK_DBDEL(ret) DB_RETOK_DBCDEL(ret) 326#define DB_RETOK_DBGET(ret) DB_RETOK_DBCGET(ret) 327#define DB_RETOK_DBPUT(ret) ((ret) == 0 || (ret) == DB_KEYEXIST) 328#define DB_RETOK_EXISTS(ret) DB_RETOK_DBCGET(ret) 329#define DB_RETOK_LGGET(ret) ((ret) == 0 || (ret) == DB_NOTFOUND) 330#define DB_RETOK_MPGET(ret) ((ret) == 0 || (ret) == DB_PAGE_NOTFOUND) 331#define DB_RETOK_REPPMSG(ret) ((ret) == 0 || \ 332 (ret) == DB_REP_IGNORE || \ 333 (ret) == DB_REP_ISPERM || \ 334 (ret) == DB_REP_NEWMASTER || \ 335 (ret) == DB_REP_NEWSITE || \ 336 (ret) == DB_REP_NOTPERM) 337#define DB_RETOK_REPMGR_START(ret) ((ret) == 0 || (ret) == DB_REP_IGNORE) 338 339/* Find a reasonable operation-not-supported error. */ 340#ifdef EOPNOTSUPP 341#define DB_OPNOTSUP EOPNOTSUPP 342#else 343#ifdef ENOTSUP 344#define DB_OPNOTSUP ENOTSUP 345#else 346#define DB_OPNOTSUP EINVAL 347#endif 348#endif 349 350/******************************************************* 351 * Files. 352 *******************************************************/ 353/* 354 * We use 1024 as the maximum path length. It's too hard to figure out what 355 * the real path length is, as it was traditionally stored in <sys/param.h>, 356 * and that file isn't always available. 357 */ 358#define DB_MAXPATHLEN 1024 359 360#define PATH_DOT "." /* Current working directory. */ 361 /* Path separator character(s). */ 362#define PATH_SEPARATOR "@PATH_SEPARATOR@" 363 364/******************************************************* 365 * Environment. 366 *******************************************************/ 367/* Type passed to __db_appname(). */ 368typedef enum { 369 DB_APP_NONE=0, /* No type (region). */ 370 DB_APP_DATA, /* Data file. */ 371 DB_APP_LOG, /* Log file. */ 372 DB_APP_TMP, /* Temporary file. */ 373 DB_APP_RECOVER /* We are in recovery. */ 374} APPNAME; 375 376/* 377 * A set of macros to check if various functionality has been configured. 378 * 379 * ALIVE_ON The is_alive function is configured. 380 * CDB_LOCKING CDB product locking. 381 * CRYPTO_ON Security has been configured. 382 * LOCKING_ON Locking has been configured. 383 * LOGGING_ON Logging has been configured. 384 * MUTEX_ON Mutexes have been configured. 385 * MPOOL_ON Memory pool has been configured. 386 * REP_ON Replication has been configured. 387 * RPC_ON RPC has been configured. 388 * TXN_ON Transactions have been configured. 389 * 390 * REP_ON is more complex than most: if the BDB library was compiled without 391 * replication support, ENV->rep_handle will be NULL; if the BDB library has 392 * replication support, but it was not configured, the region reference will 393 * be NULL. 394 */ 395#define ALIVE_ON(env) ((env)->dbenv->is_alive != NULL) 396#define CDB_LOCKING(env) F_ISSET(env, ENV_CDB) 397#define CRYPTO_ON(env) ((env)->crypto_handle != NULL) 398#define LOCKING_ON(env) ((env)->lk_handle != NULL) 399#define LOGGING_ON(env) ((env)->lg_handle != NULL) 400#define MPOOL_ON(env) ((env)->mp_handle != NULL) 401#define MUTEX_ON(env) ((env)->mutex_handle != NULL) 402#define REP_ON(env) \ 403 ((env)->rep_handle != NULL && (env)->rep_handle->region != NULL) 404#define RPC_ON(dbenv) ((dbenv)->cl_handle != NULL) 405#define TXN_ON(env) ((env)->tx_handle != NULL) 406 407/* 408 * STD_LOCKING Standard locking, that is, locking was configured and CDB 409 * was not. We do not do locking in off-page duplicate trees, 410 * so we check for that in the cursor first. 411 */ 412#define STD_LOCKING(dbc) \ 413 (!F_ISSET(dbc, DBC_OPD) && \ 414 !CDB_LOCKING((dbc)->env) && LOCKING_ON((dbc)->env)) 415 416/* 417 * IS_RECOVERING: The system is running recovery. 418 */ 419#define IS_RECOVERING(env) \ 420 (LOGGING_ON(env) && F_ISSET((env)->lg_handle, DBLOG_RECOVER)) 421 422/* Initialization methods are often illegal before/after open is called. */ 423#define ENV_ILLEGAL_AFTER_OPEN(env, name) \ 424 if (F_ISSET((env), ENV_OPEN_CALLED)) \ 425 return (__db_mi_open(env, name, 1)); 426#define ENV_ILLEGAL_BEFORE_OPEN(env, name) \ 427 if (!F_ISSET((env), ENV_OPEN_CALLED)) \ 428 return (__db_mi_open(env, name, 0)); 429 430/* We're not actually user hostile, honest. */ 431#define ENV_REQUIRES_CONFIG(env, handle, i, flags) \ 432 if (handle == NULL) \ 433 return (__env_not_config(env, i, flags)); 434#define ENV_REQUIRES_CONFIG_XX(env, handle, i, flags) \ 435 if ((env)->handle->region == NULL) \ 436 return (__env_not_config(env, i, flags)); 437#define ENV_NOT_CONFIGURED(env, handle, i, flags) \ 438 if (F_ISSET((env), ENV_OPEN_CALLED)) \ 439 ENV_REQUIRES_CONFIG(env, handle, i, flags) 440 441#define ENV_ENTER(env, ip) do { \ 442 int __ret; \ 443 PANIC_CHECK(env); \ 444 if ((env)->thr_hashtab == NULL) \ 445 ip = NULL; \ 446 else { \ 447 if ((__ret = \ 448 __env_set_state(env, &(ip), THREAD_ACTIVE)) != 0) \ 449 return (__ret); \ 450 } \ 451} while (0) 452 453#define FAILCHK_THREAD(env, ip) do { \ 454 if ((ip) != NULL) \ 455 (ip)->dbth_state = THREAD_FAILCHK; \ 456} while (0) 457 458#define ENV_GET_THREAD_INFO(env, ip) ENV_ENTER(env, ip) 459 460#ifdef DIAGNOSTIC 461#define ENV_LEAVE(env, ip) do { \ 462 if ((ip) != NULL) { \ 463 DB_ASSERT(env, ((ip)->dbth_state == THREAD_ACTIVE || \ 464 (ip)->dbth_state == THREAD_FAILCHK)); \ 465 (ip)->dbth_state = THREAD_OUT; \ 466 } \ 467} while (0) 468#else 469#define ENV_LEAVE(env, ip) do { \ 470 if ((ip) != NULL) \ 471 (ip)->dbth_state = THREAD_OUT; \ 472} while (0) 473#endif 474#ifdef DIAGNOSTIC 475#define CHECK_THREAD(env) do { \ 476 if ((env)->thr_hashtab != NULL) \ 477 (void)__env_set_state(env, NULL, THREAD_VERIFY); \ 478} while (0) 479#ifdef HAVE_STATISTICS 480#define CHECK_MTX_THREAD(env, mtx) do { \ 481 if (mtx->alloc_id != MTX_MUTEX_REGION && \ 482 mtx->alloc_id != MTX_ENV_REGION && \ 483 mtx->alloc_id != MTX_APPLICATION) \ 484 CHECK_THREAD(env); \ 485} while (0) 486#else 487#define CHECK_MTX_THREAD(env, mtx) 488#endif 489#else 490#define CHECK_THREAD(env) 491#define CHECK_MTX_THREAD(env, mtx) 492#endif 493 494typedef enum { 495 THREAD_SLOT_NOT_IN_USE=0, 496 THREAD_OUT, 497 THREAD_ACTIVE, 498 THREAD_BLOCKED, 499 THREAD_BLOCKED_DEAD, 500 THREAD_FAILCHK, 501 THREAD_VERIFY 502} DB_THREAD_STATE; 503 504typedef struct __pin_list { 505 roff_t b_ref; /* offset to buffer. */ 506 int region; /* region containing buffer. */ 507} PIN_LIST; 508#define PINMAX 4 509 510struct __db_thread_info { 511 pid_t dbth_pid; 512 db_threadid_t dbth_tid; 513 DB_THREAD_STATE dbth_state; 514 SH_TAILQ_ENTRY dbth_links; 515 /* 516 * The following fields track which buffers this thread of 517 * control has pinned in the mpool buffer cache. 518 */ 519 u_int16_t dbth_pincount; /* Number of pins for this thread. */ 520 u_int16_t dbth_pinmax; /* Number of slots allocated. */ 521 roff_t dbth_pinlist; /* List of pins. */ 522 PIN_LIST dbth_pinarray[PINMAX]; /* Initial array of slots. */ 523}; 524 525typedef struct __env_thread_info { 526 u_int32_t thr_count; 527 u_int32_t thr_max; 528 u_int32_t thr_nbucket; 529 roff_t thr_hashoff; 530} THREAD_INFO; 531 532#define DB_EVENT(env, e, einfo) do { \ 533 DB_ENV *__dbenv = (env)->dbenv; \ 534 if (__dbenv->db_event_func != NULL) \ 535 __dbenv->db_event_func(__dbenv, e, einfo); \ 536} while (0) 537 538typedef struct __flag_map { 539 u_int32_t inflag, outflag; 540} FLAG_MAP; 541 542/* 543 * Internal database environment structure. 544 * 545 * This is the private database environment handle. The public environment 546 * handle is the DB_ENV structure. The library owns this structure, the user 547 * owns the DB_ENV structure. The reason there are two structures is because 548 * the user's configuration outlives any particular DB_ENV->open call, and 549 * separate structures allows us to easily discard internal information without 550 * discarding the user's configuration. 551 */ 552struct __env { 553 DB_ENV *dbenv; /* Linked DB_ENV structure */ 554 555 /* 556 * The ENV structure can be used concurrently, so field access is 557 * protected. 558 */ 559 db_mutex_t mtx_env; /* ENV structure mutex */ 560 561 /* 562 * Some fields are included in the ENV structure rather than in the 563 * DB_ENV structure because they are only set as arguments to the 564 * DB_ENV->open method. In other words, because of the historic API, 565 * not for any rational reason. 566 * 567 * Arguments to DB_ENV->open. 568 */ 569 char *db_home; /* Database home */ 570 u_int32_t open_flags; /* Flags */ 571 int db_mode; /* Default open permissions */ 572 573 pid_t pid_cache; /* Cached process ID */ 574 575 DB_FH *lockfhp; /* fcntl(2) locking file handle */ 576 577 DB_LOCKER *env_lref; /* Locker in non-threaded handles */ 578 579 DB_DISTAB recover_dtab; /* Dispatch table for recover funcs */ 580 581 int dir_mode; /* Intermediate directory perms. */ 582 583 /* Thread tracking */ 584 u_int32_t thr_nbucket; /* Number of hash buckets */ 585 DB_HASHTAB *thr_hashtab; /* Hash table of DB_THREAD_INFO */ 586 587 /* Mutex allocation */ 588 struct { 589 int alloc_id; /* Allocation ID argument */ 590 u_int32_t flags; /* Flags argument */ 591 } *mutex_iq; /* Initial mutexes queue */ 592 u_int mutex_iq_next; /* Count of initial mutexes */ 593 u_int mutex_iq_max; /* Maximum initial mutexes */ 594 595 /* 596 * List of open DB handles for this ENV, used for cursor 597 * adjustment. Must be protected for multi-threaded support. 598 */ 599 db_mutex_t mtx_dblist; 600 int db_ref; /* DB handle reference count */ 601 TAILQ_HEAD(__dblist, __db) dblist; 602 603 /* 604 * List of open file handles for this ENV. Must be protected 605 * for multi-threaded support. 606 */ 607 TAILQ_HEAD(__fdlist, __fh_t) fdlist; 608 609 db_mutex_t mtx_mt; /* Mersenne Twister mutex */ 610 int mti; /* Mersenne Twister index */ 611 u_long *mt; /* Mersenne Twister state vector */ 612 613 DB_CIPHER *crypto_handle; /* Crypto handle */ 614 DB_LOCKTAB *lk_handle; /* Lock handle */ 615 DB_LOG *lg_handle; /* Log handle */ 616 DB_MPOOL *mp_handle; /* Mpool handle */ 617 DB_MUTEXMGR *mutex_handle; /* Mutex handle */ 618 DB_REP *rep_handle; /* Replication handle */ 619 DB_TXNMGR *tx_handle; /* Txn handle */ 620 621 /* Application callback to copy data to/from a custom data source */ 622#define DB_USERCOPY_GETDATA 0x0001 623#define DB_USERCOPY_SETDATA 0x0002 624 int (*dbt_usercopy) 625 __P((DBT *, u_int32_t, void *, u_int32_t, u_int32_t)); 626 627 REGINFO *reginfo; /* REGINFO structure reference */ 628 629#define DB_TEST_ELECTINIT 1 /* after __rep_elect_init */ 630#define DB_TEST_ELECTVOTE1 2 /* after sending VOTE1 */ 631#define DB_TEST_POSTDESTROY 3 /* after destroy op */ 632#define DB_TEST_POSTLOG 4 /* after logging all pages */ 633#define DB_TEST_POSTLOGMETA 5 /* after logging meta in btree */ 634#define DB_TEST_POSTOPEN 6 /* after __os_open */ 635#define DB_TEST_POSTSYNC 7 /* after syncing the log */ 636#define DB_TEST_PREDESTROY 8 /* before destroy op */ 637#define DB_TEST_PREOPEN 9 /* before __os_open */ 638#define DB_TEST_SUBDB_LOCKS 10 /* subdb locking tests */ 639 int test_abort; /* Abort value for testing */ 640 int test_check; /* Checkpoint value for testing */ 641 int test_copy; /* Copy value for testing */ 642 643#define ENV_CDB 0x00000001 /* DB_INIT_CDB */ 644#define ENV_DBLOCAL 0x00000002 /* Environment for a private DB */ 645#define ENV_LITTLEENDIAN 0x00000004 /* Little endian system. */ 646#define ENV_LOCKDOWN 0x00000008 /* DB_LOCKDOWN set */ 647#define ENV_NO_OUTPUT_SET 0x00000010 /* No output channel set */ 648#define ENV_OPEN_CALLED 0x00000020 /* DB_ENV->open called */ 649#define ENV_PRIVATE 0x00000040 /* DB_PRIVATE set */ 650#define ENV_RECOVER_FATAL 0x00000080 /* Doing fatal recovery in env */ 651#define ENV_REF_COUNTED 0x00000100 /* Region references this handle */ 652#define ENV_SYSTEM_MEM 0x00000200 /* DB_SYSTEM_MEM set */ 653#define ENV_THREAD 0x00000400 /* DB_THREAD set */ 654 u_int32_t flags; 655}; 656 657/******************************************************* 658 * Database Access Methods. 659 *******************************************************/ 660/* 661 * DB_IS_THREADED -- 662 * The database handle is free-threaded (was opened with DB_THREAD). 663 */ 664#define DB_IS_THREADED(dbp) \ 665 ((dbp)->mutex != MUTEX_INVALID) 666 667/* Initialization methods are often illegal before/after open is called. */ 668#define DB_ILLEGAL_AFTER_OPEN(dbp, name) \ 669 if (F_ISSET((dbp), DB_AM_OPEN_CALLED)) \ 670 return (__db_mi_open((dbp)->env, name, 1)); 671#define DB_ILLEGAL_BEFORE_OPEN(dbp, name) \ 672 if (!F_ISSET((dbp), DB_AM_OPEN_CALLED)) \ 673 return (__db_mi_open((dbp)->env, name, 0)); 674/* Some initialization methods are illegal if environment isn't local. */ 675#define DB_ILLEGAL_IN_ENV(dbp, name) \ 676 if (!F_ISSET((dbp)->env, ENV_DBLOCAL)) \ 677 return (__db_mi_env((dbp)->env, name)); 678#define DB_ILLEGAL_METHOD(dbp, flags) { \ 679 int __ret; \ 680 if ((__ret = __dbh_am_chk(dbp, flags)) != 0) \ 681 return (__ret); \ 682} 683 684/* 685 * Common DBC->internal fields. Each access method adds additional fields 686 * to this list, but the initial fields are common. 687 */ 688#define __DBC_INTERNAL \ 689 DBC *opd; /* Off-page duplicate cursor. */\ 690 DBC *pdbc; /* Pointer to parent cursor. */ \ 691 \ 692 void *page; /* Referenced page. */ \ 693 u_int32_t part; /* Partition number. */ \ 694 db_pgno_t root; /* Tree root. */ \ 695 db_pgno_t pgno; /* Referenced page number. */ \ 696 db_indx_t indx; /* Referenced key item index. */\ 697 \ 698 /* Streaming -- cache last position. */ \ 699 db_pgno_t stream_start_pgno; /* Last start pgno. */ \ 700 u_int32_t stream_off; /* Current offset. */ \ 701 db_pgno_t stream_curr_pgno; /* Current overflow page. */ \ 702 \ 703 DB_LOCK lock; /* Cursor lock. */ \ 704 db_lockmode_t lock_mode; /* Lock mode. */ 705 706struct __dbc_internal { 707 __DBC_INTERNAL 708}; 709 710/* Actions that __db_master_update can take. */ 711typedef enum { MU_REMOVE, MU_RENAME, MU_OPEN } mu_action; 712 713/* 714 * Access-method-common macro for determining whether a cursor 715 * has been initialized. 716 */ 717#ifdef HAVE_PARTITION 718#define IS_INITIALIZED(dbc) (DB_IS_PARTITIONED((dbc)->dbp) ? \ 719 ((PART_CURSOR *)(dbc)->internal)->sub_cursor != NULL && \ 720 ((PART_CURSOR *)(dbc)->internal)->sub_cursor-> \ 721 internal->pgno != PGNO_INVALID : \ 722 (dbc)->internal->pgno != PGNO_INVALID) 723#else 724#define IS_INITIALIZED(dbc) ((dbc)->internal->pgno != PGNO_INVALID) 725#endif 726 727/* Free the callback-allocated buffer, if necessary, hanging off of a DBT. */ 728#define FREE_IF_NEEDED(env, dbt) \ 729 if (F_ISSET((dbt), DB_DBT_APPMALLOC)) { \ 730 __os_ufree((env), (dbt)->data); \ 731 F_CLR((dbt), DB_DBT_APPMALLOC); \ 732 } 733 734/* 735 * Use memory belonging to object "owner" to return the results of 736 * any no-DBT-flag get ops on cursor "dbc". 737 */ 738#define SET_RET_MEM(dbc, owner) \ 739 do { \ 740 (dbc)->rskey = &(owner)->my_rskey; \ 741 (dbc)->rkey = &(owner)->my_rkey; \ 742 (dbc)->rdata = &(owner)->my_rdata; \ 743 } while (0) 744 745/* Use the return-data memory src is currently set to use in dest as well. */ 746#define COPY_RET_MEM(src, dest) \ 747 do { \ 748 (dest)->rskey = (src)->rskey; \ 749 (dest)->rkey = (src)->rkey; \ 750 (dest)->rdata = (src)->rdata; \ 751 } while (0) 752 753/* Reset the returned-memory pointers to their defaults. */ 754#define RESET_RET_MEM(dbc) \ 755 do { \ 756 (dbc)->rskey = &(dbc)->my_rskey; \ 757 (dbc)->rkey = &(dbc)->my_rkey; \ 758 (dbc)->rdata = &(dbc)->my_rdata; \ 759 } while (0) 760 761/******************************************************* 762 * Mpool. 763 *******************************************************/ 764/* 765 * File types for DB access methods. Negative numbers are reserved to DB. 766 */ 767#define DB_FTYPE_SET -1 /* Call pgin/pgout functions. */ 768#define DB_FTYPE_NOTSET 0 /* Don't call... */ 769#define DB_LSN_OFF_NOTSET -1 /* Not yet set. */ 770#define DB_CLEARLEN_NOTSET UINT32_MAX /* Not yet set. */ 771 772/* Structure used as the DB pgin/pgout pgcookie. */ 773typedef struct __dbpginfo { 774 size_t db_pagesize; /* Underlying page size. */ 775 u_int32_t flags; /* Some DB_AM flags needed. */ 776 DBTYPE type; /* DB type */ 777} DB_PGINFO; 778 779/******************************************************* 780 * Log. 781 *******************************************************/ 782/* Initialize an LSN to 'zero'. */ 783#define ZERO_LSN(LSN) do { \ 784 (LSN).file = 0; \ 785 (LSN).offset = 0; \ 786} while (0) 787#define IS_ZERO_LSN(LSN) ((LSN).file == 0 && (LSN).offset == 0) 788 789#define IS_INIT_LSN(LSN) ((LSN).file == 1 && (LSN).offset == 0) 790#define INIT_LSN(LSN) do { \ 791 (LSN).file = 1; \ 792 (LSN).offset = 0; \ 793} while (0) 794 795#define MAX_LSN(LSN) do { \ 796 (LSN).file = UINT32_MAX; \ 797 (LSN).offset = UINT32_MAX; \ 798} while (0) 799#define IS_MAX_LSN(LSN) \ 800 ((LSN).file == UINT32_MAX && (LSN).offset == UINT32_MAX) 801 802/* If logging is turned off, smash the lsn. */ 803#define LSN_NOT_LOGGED(LSN) do { \ 804 (LSN).file = 0; \ 805 (LSN).offset = 1; \ 806} while (0) 807#define IS_NOT_LOGGED_LSN(LSN) \ 808 ((LSN).file == 0 && (LSN).offset == 1) 809 810/* 811 * LOG_COMPARE -- compare two LSNs. 812 */ 813 814#define LOG_COMPARE(lsn0, lsn1) \ 815 ((lsn0)->file != (lsn1)->file ? \ 816 ((lsn0)->file < (lsn1)->file ? -1 : 1) : \ 817 ((lsn0)->offset != (lsn1)->offset ? \ 818 ((lsn0)->offset < (lsn1)->offset ? -1 : 1) : 0)) 819 820/******************************************************* 821 * Txn. 822 *******************************************************/ 823#define DB_NONBLOCK(C) ((C)->txn != NULL && F_ISSET((C)->txn, TXN_NOWAIT)) 824#define NOWAIT_FLAG(txn) \ 825 ((txn) != NULL && F_ISSET((txn), TXN_NOWAIT) ? DB_LOCK_NOWAIT : 0) 826#define IS_REAL_TXN(txn) \ 827 ((txn) != NULL && !F_ISSET(txn, TXN_CDSGROUP)) 828#define IS_SUBTRANSACTION(txn) \ 829 ((txn) != NULL && (txn)->parent != NULL) 830 831/******************************************************* 832 * Crypto. 833 *******************************************************/ 834#define DB_IV_BYTES 16 /* Bytes per IV */ 835#define DB_MAC_KEY 20 /* Bytes per MAC checksum */ 836 837/******************************************************* 838 * Compression 839 *******************************************************/ 840#define CMP_INT_SPARE_VAL 0xFC /* Smallest byte value that the integer 841 compression algorithm doesn't use */ 842 843/******************************************************* 844 * Secondaries over RPC. 845 *******************************************************/ 846#ifdef CONFIG_TEST 847/* 848 * These are flags passed to DB->associate calls by the Tcl API if running 849 * over RPC. The RPC server will mask out these flags before making the real 850 * DB->associate call. 851 * 852 * These flags must coexist with the valid flags to DB->associate (currently 853 * DB_AUTO_COMMIT and DB_CREATE). DB_AUTO_COMMIT is in the group of 854 * high-order shared flags (0xff000000), and DB_CREATE is in the low-order 855 * group (0x00000fff), so we pick a range in between. 856 */ 857#define DB_RPC2ND_MASK 0x00f00000 /* Reserved bits. */ 858 859#define DB_RPC2ND_REVERSEDATA 0x00100000 /* callback_n(0) _s_reversedata. */ 860#define DB_RPC2ND_NOOP 0x00200000 /* callback_n(1) _s_noop */ 861#define DB_RPC2ND_CONCATKEYDATA 0x00300000 /* callback_n(2) _s_concatkeydata */ 862#define DB_RPC2ND_CONCATDATAKEY 0x00400000 /* callback_n(3) _s_concatdatakey */ 863#define DB_RPC2ND_REVERSECONCAT 0x00500000 /* callback_n(4) _s_reverseconcat */ 864#define DB_RPC2ND_TRUNCDATA 0x00600000 /* callback_n(5) _s_truncdata */ 865#define DB_RPC2ND_CONSTANT 0x00700000 /* callback_n(6) _s_constant */ 866#define DB_RPC2ND_GETZIP 0x00800000 /* sj_getzip */ 867#define DB_RPC2ND_GETNAME 0x00900000 /* sj_getname */ 868#endif 869 870#if defined(__cplusplus) 871} 872#endif 873 874/******************************************************* 875 * Remaining general DB includes. 876 *******************************************************/ 877@db_int_def@ 878 879#include "dbinc/globals.h" 880#include "dbinc/clock.h" 881#include "dbinc/debug.h" 882#include "dbinc/region.h" 883#include "dbinc_auto/env_ext.h" 884#include "dbinc/mutex.h" 885#ifdef HAVE_REPLICATION_THREADS 886#include "dbinc/repmgr.h" 887#endif 888#include "dbinc/rep.h" 889#include "dbinc/os.h" 890#include "dbinc_auto/clib_ext.h" 891#include "dbinc_auto/common_ext.h" 892 893/******************************************************* 894 * Remaining Log. 895 * These need to be defined after the general includes 896 * because they need rep.h from above. 897 *******************************************************/ 898/* 899 * Test if the environment is currently logging changes. If we're in recovery 900 * or we're a replication client, we don't need to log changes because they're 901 * already in the log, even though we have a fully functional log system. 902 */ 903#define DBENV_LOGGING(env) \ 904 (LOGGING_ON(env) && !IS_REP_CLIENT(env) && (!IS_RECOVERING(env))) 905 906/* 907 * Test if we need to log a change. By default, we don't log operations without 908 * associated transactions, unless DIAGNOSTIC, DEBUG_ROP or DEBUG_WOP are on. 909 * This is because we want to get log records for read/write operations, and, if 910 * we are trying to debug something, more information is always better. 911 * 912 * The DBC_RECOVER flag is set when we're in abort, as well as during recovery; 913 * thus DBC_LOGGING may be false for a particular dbc even when DBENV_LOGGING 914 * is true. 915 * 916 * We explicitly use LOGGING_ON/IS_REP_CLIENT here because we don't want to pull 917 * in the log headers, which IS_RECOVERING (and thus DBENV_LOGGING) rely on, and 918 * because DBC_RECOVER should be set anytime IS_RECOVERING would be true. 919 * 920 * If we're not in recovery (master - doing an abort or a client applying 921 * a txn), then a client's only path through here is on an internal 922 * operation, and a master's only path through here is a transactional 923 * operation. Detect if either is not the case. 924 */ 925#if defined(DIAGNOSTIC) || defined(DEBUG_ROP) || defined(DEBUG_WOP) 926#define DBC_LOGGING(dbc) __dbc_logging(dbc) 927#else 928#define DBC_LOGGING(dbc) \ 929 ((dbc)->txn != NULL && LOGGING_ON((dbc)->env) && \ 930 !F_ISSET((dbc), DBC_RECOVER) && !IS_REP_CLIENT((dbc)->env)) 931#endif 932 933#endif /* !_DB_INT_H_ */ 934