1/*
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996-2009 Oracle.  All rights reserved.
5 *
6 * $Id$
7 *
8 * db.h include file layout:
9 *	General.
10 *	Database Environment.
11 *	Locking subsystem.
12 *	Logging subsystem.
13 *	Shared buffer cache (mpool) subsystem.
14 *	Transaction subsystem.
15 *	Access methods.
16 *	Access method cursors.
17 *	Dbm/Ndbm, Hsearch historic interfaces.
18 */
19
20#ifndef _DB_H_
21#define	_DB_H_
22
23#ifndef	__NO_SYSTEM_INCLUDES
24#include <sys/types.h>
25@inttypes_h_decl@
26@stdint_h_decl@
27@stddef_h_decl@
28#include <stdio.h>
29@unistd_h_decl@
30@thread_h_decl@
31#endif
32
33@platform_header@
34#if defined(__cplusplus)
35extern "C" {
36#endif
37
38@DB_CONST@
39@DB_PROTO1@
40@DB_PROTO2@
41
42/*
43 * Berkeley DB version information.
44 */
45#define	DB_VERSION_MAJOR	@DB_VERSION_MAJOR@
46#define	DB_VERSION_MINOR	@DB_VERSION_MINOR@
47#define	DB_VERSION_PATCH	@DB_VERSION_PATCH@
48#define	DB_VERSION_STRING	@DB_VERSION_STRING@
49
50/*
51 * !!!
52 * Berkeley DB uses specifically sized types.  If they're not provided by
53 * the system, typedef them here.
54 *
55 * We protect them against multiple inclusion using __BIT_TYPES_DEFINED__,
56 * as does BIND and Kerberos, since we don't know for sure what #include
57 * files the user is using.
58 *
59 * !!!
60 * We also provide the standard u_int, u_long etc., if they're not provided
61 * by the system.
62 */
63#ifndef	__BIT_TYPES_DEFINED__
64#define	__BIT_TYPES_DEFINED__
65@u_int8_decl@
66@int16_decl@
67@u_int16_decl@
68@int32_decl@
69@u_int32_decl@
70@int64_decl@
71@u_int64_decl@
72#endif
73
74@u_char_decl@
75@u_int_decl@
76@u_long_decl@
77@u_short_decl@
78
79/*
80 * Missing ANSI types.
81 *
82 * uintmax_t --
83 * Largest unsigned type, used to align structures in memory.  We don't store
84 * floating point types in structures, so integral types should be sufficient
85 * (and we don't have to worry about systems that store floats in other than
86 * power-of-2 numbers of bytes).  Additionally this fixes compilers that rewrite
87 * structure assignments and ANSI C memcpy calls to be in-line instructions
88 * that happen to require alignment.
89 *
90 * uintptr_t --
91 * Unsigned type that's the same size as a pointer.  There are places where
92 * DB modifies pointers by discarding the bottom bits to guarantee alignment.
93 * We can't use uintmax_t, it may be larger than the pointer, and compilers
94 * get upset about that.  So far we haven't run on any machine where there's
95 * no unsigned type the same size as a pointer -- here's hoping.
96 */
97@uintmax_t_decl@
98@uintptr_t_decl@
99
100@FILE_t_decl@
101@off_t_decl@
102@pid_t_decl@
103@size_t_decl@
104@ssize_t_decl@
105@time_t_decl@
106
107/*
108 * Sequences are only available on machines with 64-bit integral types.
109 */
110@db_seq_decl@
111
112/* Thread and process identification. */
113@db_threadid_t_decl@
114
115/* Basic types that are exported or quasi-exported. */
116typedef	u_int32_t	db_pgno_t;	/* Page number type. */
117typedef	u_int16_t	db_indx_t;	/* Page offset type. */
118#define	DB_MAX_PAGES	0xffffffff	/* >= # of pages in a file */
119
120typedef	u_int32_t	db_recno_t;	/* Record number type. */
121#define	DB_MAX_RECORDS	0xffffffff	/* >= # of records in a tree */
122
123typedef u_int32_t	db_timeout_t;	/* Type of a timeout. */
124
125/*
126 * Region offsets are the difference between a pointer in a region and the
127 * region's base address.  With private environments, both addresses are the
128 * result of calling malloc, and we can't assume anything about what malloc
129 * will return, so region offsets have to be able to hold differences between
130 * arbitrary pointers.
131 */
132typedef	uintptr_t	roff_t;
133
134/*
135 * Forward structure declarations, so we can declare pointers and
136 * applications can get type checking.
137 */
138struct __db;		typedef struct __db DB;
139struct __db_bt_stat;	typedef struct __db_bt_stat DB_BTREE_STAT;
140struct __db_cipher;	typedef struct __db_cipher DB_CIPHER;
141struct __db_compact;	typedef struct __db_compact DB_COMPACT;
142struct __db_dbt;	typedef struct __db_dbt DBT;
143struct __db_distab;	typedef struct __db_distab DB_DISTAB;
144struct __db_env;	typedef struct __db_env DB_ENV;
145struct __db_h_stat;	typedef struct __db_h_stat DB_HASH_STAT;
146struct __db_ilock;	typedef struct __db_ilock DB_LOCK_ILOCK;
147struct __db_lock_hstat;	typedef struct __db_lock_hstat DB_LOCK_HSTAT;
148struct __db_lock_pstat;	typedef struct __db_lock_pstat DB_LOCK_PSTAT;
149struct __db_lock_stat;	typedef struct __db_lock_stat DB_LOCK_STAT;
150struct __db_lock_u;	typedef struct __db_lock_u DB_LOCK;
151struct __db_locker;	typedef struct __db_locker DB_LOCKER;
152struct __db_lockreq;	typedef struct __db_lockreq DB_LOCKREQ;
153struct __db_locktab;	typedef struct __db_locktab DB_LOCKTAB;
154struct __db_log;	typedef struct __db_log DB_LOG;
155struct __db_log_cursor;	typedef struct __db_log_cursor DB_LOGC;
156struct __db_log_stat;	typedef struct __db_log_stat DB_LOG_STAT;
157struct __db_lsn;	typedef struct __db_lsn DB_LSN;
158struct __db_mpool;	typedef struct __db_mpool DB_MPOOL;
159struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT;
160struct __db_mpool_stat;	typedef struct __db_mpool_stat DB_MPOOL_STAT;
161struct __db_mpoolfile;	typedef struct __db_mpoolfile DB_MPOOLFILE;
162struct __db_mutex_stat;	typedef struct __db_mutex_stat DB_MUTEX_STAT;
163struct __db_mutex_t;	typedef struct __db_mutex_t DB_MUTEX;
164struct __db_mutexmgr;	typedef struct __db_mutexmgr DB_MUTEXMGR;
165struct __db_preplist;	typedef struct __db_preplist DB_PREPLIST;
166struct __db_qam_stat;	typedef struct __db_qam_stat DB_QUEUE_STAT;
167struct __db_rep;	typedef struct __db_rep DB_REP;
168struct __db_rep_stat;	typedef struct __db_rep_stat DB_REP_STAT;
169struct __db_repmgr_site;typedef struct __db_repmgr_site DB_REPMGR_SITE;
170struct __db_repmgr_stat;typedef struct __db_repmgr_stat DB_REPMGR_STAT;
171struct __db_seq_record; typedef struct __db_seq_record DB_SEQ_RECORD;
172struct __db_seq_stat;	typedef struct __db_seq_stat DB_SEQUENCE_STAT;
173struct __db_sequence;	typedef struct __db_sequence DB_SEQUENCE;
174struct __db_thread_info;typedef struct __db_thread_info DB_THREAD_INFO;
175struct __db_txn;	typedef struct __db_txn DB_TXN;
176struct __db_txn_active;	typedef struct __db_txn_active DB_TXN_ACTIVE;
177struct __db_txn_stat;	typedef struct __db_txn_stat DB_TXN_STAT;
178struct __db_txnmgr;	typedef struct __db_txnmgr DB_TXNMGR;
179struct __dbc;		typedef struct __dbc DBC;
180struct __dbc_internal;	typedef struct __dbc_internal DBC_INTERNAL;
181struct __env;		typedef struct __env ENV;
182struct __fh_t;		typedef struct __fh_t DB_FH;
183struct __fname;		typedef struct __fname FNAME;
184struct __key_range;	typedef struct __key_range DB_KEY_RANGE;
185struct __mpoolfile;	typedef struct __mpoolfile MPOOLFILE;
186
187/*
188 * The Berkeley DB API flags are automatically-generated -- the following flag
189 * names are no longer used, but remain for compatibility reasons.
190 */
191#define	DB_DEGREE_2	      DB_READ_COMMITTED
192#define	DB_DIRTY_READ	      DB_READ_UNCOMMITTED
193#define	DB_JOINENV	      0x0
194
195/* Key/data structure -- a Data-Base Thang. */
196struct __db_dbt {
197	void	 *data;			/* Key/data */
198	u_int32_t size;			/* key/data length */
199
200	u_int32_t ulen;			/* RO: length of user buffer. */
201	u_int32_t dlen;			/* RO: get/put record length. */
202	u_int32_t doff;			/* RO: get/put record offset. */
203
204	void *app_data;
205
206#define	DB_DBT_APPMALLOC	0x001	/* Callback allocated memory. */
207#define	DB_DBT_BULK		0x002	/* Internal: Insert if duplicate. */
208#define	DB_DBT_DUPOK		0x004	/* Internal: Insert if duplicate. */
209#define	DB_DBT_ISSET		0x008	/* Lower level calls set value. */
210#define	DB_DBT_MALLOC		0x010	/* Return in malloc'd memory. */
211#define	DB_DBT_MULTIPLE		0x020	/* References multiple records. */
212#define	DB_DBT_PARTIAL		0x040	/* Partial put/get. */
213#define	DB_DBT_REALLOC		0x080	/* Return in realloc'd memory. */
214#define	DB_DBT_STREAMING	0x100	/* Internal: DBT is being streamed. */
215#define	DB_DBT_USERCOPY		0x200	/* Use the user-supplied callback. */
216#define	DB_DBT_USERMEM		0x400	/* Return in user's memory. */
217	u_int32_t flags;
218};
219
220/*******************************************************
221 * Mutexes.
222 *******************************************************/
223typedef u_int32_t	db_mutex_t;
224
225struct __db_mutex_stat {
226	/* The following fields are maintained in the region's copy. */
227	u_int32_t st_mutex_align;	/* Mutex alignment */
228	u_int32_t st_mutex_tas_spins;	/* Mutex test-and-set spins */
229	u_int32_t st_mutex_cnt;		/* Mutex count */
230	u_int32_t st_mutex_free;	/* Available mutexes */
231	u_int32_t st_mutex_inuse;	/* Mutexes in use */
232	u_int32_t st_mutex_inuse_max;	/* Maximum mutexes ever in use */
233
234	/* The following fields are filled-in from other places. */
235#ifndef __TEST_DB_NO_STATISTICS
236	uintmax_t st_region_wait;	/* Region lock granted after wait. */
237	uintmax_t st_region_nowait;	/* Region lock granted without wait. */
238	roff_t	  st_regsize;		/* Region size. */
239#endif
240};
241
242/* This is the length of the buffer passed to DB_ENV->thread_id_string() */
243#define	DB_THREADID_STRLEN	128
244
245/*******************************************************
246 * Locking.
247 *******************************************************/
248#define	DB_LOCKVERSION	1
249
250#define	DB_FILE_ID_LEN		20	/* Unique file ID length. */
251
252/*
253 * Deadlock detector modes; used in the DB_ENV structure to configure the
254 * locking subsystem.
255 */
256#define	DB_LOCK_NORUN		0
257#define	DB_LOCK_DEFAULT		1	/* Default policy. */
258#define	DB_LOCK_EXPIRE		2	/* Only expire locks, no detection. */
259#define	DB_LOCK_MAXLOCKS	3	/* Select locker with max locks. */
260#define	DB_LOCK_MAXWRITE	4	/* Select locker with max writelocks. */
261#define	DB_LOCK_MINLOCKS	5	/* Select locker with min locks. */
262#define	DB_LOCK_MINWRITE	6	/* Select locker with min writelocks. */
263#define	DB_LOCK_OLDEST		7	/* Select oldest locker. */
264#define	DB_LOCK_RANDOM		8	/* Select random locker. */
265#define	DB_LOCK_YOUNGEST	9	/* Select youngest locker. */
266
267/*
268 * Simple R/W lock modes and for multi-granularity intention locking.
269 *
270 * !!!
271 * These values are NOT random, as they are used as an index into the lock
272 * conflicts arrays, i.e., DB_LOCK_IWRITE must be == 3, and DB_LOCK_IREAD
273 * must be == 4.
274 */
275typedef enum {
276	DB_LOCK_NG=0,			/* Not granted. */
277	DB_LOCK_READ=1,			/* Shared/read. */
278	DB_LOCK_WRITE=2,		/* Exclusive/write. */
279	DB_LOCK_WAIT=3,			/* Wait for event */
280	DB_LOCK_IWRITE=4,		/* Intent exclusive/write. */
281	DB_LOCK_IREAD=5,		/* Intent to share/read. */
282	DB_LOCK_IWR=6,			/* Intent to read and write. */
283	DB_LOCK_READ_UNCOMMITTED=7,	/* Degree 1 isolation. */
284	DB_LOCK_WWRITE=8		/* Was Written. */
285} db_lockmode_t;
286
287/*
288 * Request types.
289 */
290typedef enum {
291	DB_LOCK_DUMP=0,			/* Display held locks. */
292	DB_LOCK_GET=1,			/* Get the lock. */
293	DB_LOCK_GET_TIMEOUT=2,		/* Get lock with a timeout. */
294	DB_LOCK_INHERIT=3,		/* Pass locks to parent. */
295	DB_LOCK_PUT=4,			/* Release the lock. */
296	DB_LOCK_PUT_ALL=5,		/* Release locker's locks. */
297	DB_LOCK_PUT_OBJ=6,		/* Release locker's locks on obj. */
298	DB_LOCK_PUT_READ=7,		/* Release locker's read locks. */
299	DB_LOCK_TIMEOUT=8,		/* Force a txn to timeout. */
300	DB_LOCK_TRADE=9,		/* Trade locker ids on a lock. */
301	DB_LOCK_UPGRADE_WRITE=10	/* Upgrade writes for dirty reads. */
302} db_lockop_t;
303
304/*
305 * Status of a lock.
306 */
307typedef enum  {
308	DB_LSTAT_ABORTED=1,		/* Lock belongs to an aborted txn. */
309	DB_LSTAT_EXPIRED=2,		/* Lock has expired. */
310	DB_LSTAT_FREE=3,		/* Lock is unallocated. */
311	DB_LSTAT_HELD=4,		/* Lock is currently held. */
312	DB_LSTAT_PENDING=5,		/* Lock was waiting and has been
313					 * promoted; waiting for the owner
314					 * to run and upgrade it to held. */
315	DB_LSTAT_WAITING=6		/* Lock is on the wait queue. */
316}db_status_t;
317
318/* Lock statistics structure. */
319struct __db_lock_stat {
320	u_int32_t st_id;		/* Last allocated locker ID. */
321	u_int32_t st_cur_maxid;		/* Current maximum unused ID. */
322	u_int32_t st_maxlocks;		/* Maximum number of locks in table. */
323	u_int32_t st_maxlockers;	/* Maximum num of lockers in table. */
324	u_int32_t st_maxobjects;	/* Maximum num of objects in table. */
325	u_int32_t st_partitions;	/* number of partitions. */
326	int	  st_nmodes;		/* Number of lock modes. */
327	u_int32_t st_nlockers;		/* Current number of lockers. */
328#ifndef __TEST_DB_NO_STATISTICS
329	u_int32_t st_nlocks;		/* Current number of locks. */
330	u_int32_t st_maxnlocks;		/* Maximum number of locks so far. */
331	u_int32_t st_maxhlocks;		/* Maximum number of locks in any bucket. */
332	uintmax_t st_locksteals;	/* Number of lock steals so far. */
333	uintmax_t st_maxlsteals;	/* Maximum number steals in any partition. */
334	u_int32_t st_maxnlockers;	/* Maximum number of lockers so far. */
335	u_int32_t st_nobjects;		/* Current number of objects. */
336	u_int32_t st_maxnobjects;	/* Maximum number of objects so far. */
337	u_int32_t st_maxhobjects;	/* Maximum number of objectsin any bucket. */
338	uintmax_t st_objectsteals;	/* Number of objects steals so far. */
339	uintmax_t st_maxosteals;	/* Maximum number of steals in any partition. */
340	uintmax_t st_nrequests;		/* Number of lock gets. */
341	uintmax_t st_nreleases;		/* Number of lock puts. */
342	uintmax_t st_nupgrade;		/* Number of lock upgrades. */
343	uintmax_t st_ndowngrade;	/* Number of lock downgrades. */
344	uintmax_t st_lock_wait;		/* Lock conflicts w/ subsequent wait */
345	uintmax_t st_lock_nowait;	/* Lock conflicts w/o subsequent wait */
346	uintmax_t st_ndeadlocks;	/* Number of lock deadlocks. */
347	db_timeout_t st_locktimeout;	/* Lock timeout. */
348	uintmax_t st_nlocktimeouts;	/* Number of lock timeouts. */
349	db_timeout_t st_txntimeout;	/* Transaction timeout. */
350	uintmax_t st_ntxntimeouts;	/* Number of transaction timeouts. */
351	uintmax_t st_part_wait;		/* Partition lock granted after wait. */
352	uintmax_t st_part_nowait;	/* Partition lock granted without wait. */
353	uintmax_t st_part_max_wait;	/* Max partition lock granted after wait. */
354	uintmax_t st_part_max_nowait;	/* Max partition lock granted without wait. */
355	uintmax_t st_objs_wait;	/* 	Object lock granted after wait. */
356	uintmax_t st_objs_nowait;	/* Object lock granted without wait. */
357	uintmax_t st_lockers_wait;	/* Locker lock granted after wait. */
358	uintmax_t st_lockers_nowait;	/* Locker lock granted without wait. */
359	uintmax_t st_region_wait;	/* Region lock granted after wait. */
360	uintmax_t st_region_nowait;	/* Region lock granted without wait. */
361	u_int32_t st_hash_len;		/* Max length of bucket. */
362	roff_t	  st_regsize;		/* Region size. */
363#endif
364};
365
366struct __db_lock_hstat {
367	uintmax_t st_nrequests;		/* Number of lock gets. */
368	uintmax_t st_nreleases;		/* Number of lock puts. */
369	uintmax_t st_nupgrade;		/* Number of lock upgrades. */
370	uintmax_t st_ndowngrade;	/* Number of lock downgrades. */
371	u_int32_t st_nlocks;		/* Current number of locks. */
372	u_int32_t st_maxnlocks;		/* Maximum number of locks so far. */
373	u_int32_t st_nobjects;		/* Current number of objects. */
374	u_int32_t st_maxnobjects;	/* Maximum number of objects so far. */
375	uintmax_t st_lock_wait;		/* Lock conflicts w/ subsequent wait */
376	uintmax_t st_lock_nowait;	/* Lock conflicts w/o subsequent wait */
377	uintmax_t st_nlocktimeouts;	/* Number of lock timeouts. */
378	uintmax_t st_ntxntimeouts;	/* Number of transaction timeouts. */
379	u_int32_t st_hash_len;		/* Max length of bucket. */
380};
381
382struct __db_lock_pstat {
383	u_int32_t st_nlocks;		/* Current number of locks. */
384	u_int32_t st_maxnlocks;		/* Maximum number of locks so far. */
385	u_int32_t st_nobjects;		/* Current number of objects. */
386	u_int32_t st_maxnobjects;	/* Maximum number of objects so far. */
387	uintmax_t st_locksteals;	/* Number of lock steals so far. */
388	uintmax_t st_objectsteals;	/* Number of objects steals so far. */
389};
390
391/*
392 * DB_LOCK_ILOCK --
393 *	Internal DB access method lock.
394 */
395struct __db_ilock {
396	db_pgno_t pgno;			/* Page being locked. */
397	u_int8_t fileid[DB_FILE_ID_LEN];/* File id. */
398#define	DB_HANDLE_LOCK	1
399#define	DB_RECORD_LOCK	2
400#define	DB_PAGE_LOCK	3
401	u_int32_t type;			/* Type of lock. */
402};
403
404/*
405 * DB_LOCK --
406 *	The structure is allocated by the caller and filled in during a
407 *	lock_get request (or a lock_vec/DB_LOCK_GET).
408 */
409struct __db_lock_u {
410	roff_t		off;		/* Offset of the lock in the region */
411	u_int32_t	ndx;		/* Index of the object referenced by
412					 * this lock; used for locking. */
413	u_int32_t	gen;		/* Generation number of this lock. */
414	db_lockmode_t	mode;		/* mode of this lock. */
415};
416
417/* Lock request structure. */
418struct __db_lockreq {
419	db_lockop_t	 op;		/* Operation. */
420	db_lockmode_t	 mode;		/* Requested mode. */
421	db_timeout_t	 timeout;	/* Time to expire lock. */
422	DBT		*obj;		/* Object being locked. */
423	DB_LOCK		 lock;		/* Lock returned. */
424};
425
426/*******************************************************
427 * Logging.
428 *******************************************************/
429#define	DB_LOGVERSION	16		/* Current log version. */
430#define DB_LOGVERSION_LATCHING 15	/* Log version using latching. */
431#define	DB_LOGCHKSUM	12		/* Check sum headers. */
432#define	DB_LOGOLDVER	8		/* Oldest log version supported. */
433#define	DB_LOGMAGIC	0x040988
434
435/*
436 * A DB_LSN has two parts, a fileid which identifies a specific file, and an
437 * offset within that file.  The fileid is an unsigned 4-byte quantity that
438 * uniquely identifies a file within the log directory -- currently a simple
439 * counter inside the log.  The offset is also an unsigned 4-byte value.  The
440 * log manager guarantees the offset is never more than 4 bytes by switching
441 * to a new log file before the maximum length imposed by an unsigned 4-byte
442 * offset is reached.
443 */
444struct __db_lsn {
445	u_int32_t	file;		/* File ID. */
446	u_int32_t	offset;		/* File offset. */
447};
448
449/*
450 * Application-specified log record types start at DB_user_BEGIN, and must not
451 * equal or exceed DB_debug_FLAG.
452 *
453 * DB_debug_FLAG is the high-bit of the u_int32_t that specifies a log record
454 * type.  If the flag is set, it's a log record that was logged for debugging
455 * purposes only, even if it reflects a database change -- the change was part
456 * of a non-durable transaction.
457 */
458#define	DB_user_BEGIN		10000
459#define	DB_debug_FLAG		0x80000000
460
461/*
462 * DB_LOGC --
463 *	Log cursor.
464 */
465struct __db_log_cursor {
466	ENV	 *env;			/* Environment */
467
468	DB_FH	 *fhp;			/* File handle. */
469	DB_LSN	  lsn;			/* Cursor: LSN */
470	u_int32_t len;			/* Cursor: record length */
471	u_int32_t prev;			/* Cursor: previous record's offset */
472
473	DBT	  dbt;			/* Return DBT. */
474	DB_LSN    p_lsn;		/* Persist LSN. */
475	u_int32_t p_version;		/* Persist version. */
476
477	u_int8_t *bp;			/* Allocated read buffer. */
478	u_int32_t bp_size;		/* Read buffer length in bytes. */
479	u_int32_t bp_rlen;		/* Read buffer valid data length. */
480	DB_LSN	  bp_lsn;		/* Read buffer first byte LSN. */
481
482	u_int32_t bp_maxrec;		/* Max record length in the log file. */
483
484	/* DB_LOGC PUBLIC HANDLE LIST BEGIN */
485	int (*close) __P((DB_LOGC *, u_int32_t));
486	int (*get) __P((DB_LOGC *, DB_LSN *, DBT *, u_int32_t));
487	int (*version) __P((DB_LOGC *, u_int32_t *, u_int32_t));
488	/* DB_LOGC PUBLIC HANDLE LIST END */
489
490#define	DB_LOG_DISK		0x01	/* Log record came from disk. */
491#define	DB_LOG_LOCKED		0x02	/* Log region already locked */
492#define	DB_LOG_SILENT_ERR	0x04	/* Turn-off error messages. */
493	u_int32_t flags;
494};
495
496/* Log statistics structure. */
497struct __db_log_stat {
498	u_int32_t st_magic;		/* Log file magic number. */
499	u_int32_t st_version;		/* Log file version number. */
500	int	  st_mode;		/* Log file permissions mode. */
501	u_int32_t st_lg_bsize;		/* Log buffer size. */
502	u_int32_t st_lg_size;		/* Log file size. */
503	u_int32_t st_wc_bytes;		/* Bytes to log since checkpoint. */
504	u_int32_t st_wc_mbytes;		/* Megabytes to log since checkpoint. */
505#ifndef __TEST_DB_NO_STATISTICS
506	uintmax_t st_record;		/* Records entered into the log. */
507	u_int32_t st_w_bytes;		/* Bytes to log. */
508	u_int32_t st_w_mbytes;		/* Megabytes to log. */
509	uintmax_t st_wcount;		/* Total I/O writes to the log. */
510	uintmax_t st_wcount_fill;	/* Overflow writes to the log. */
511	uintmax_t st_rcount;		/* Total I/O reads from the log. */
512	uintmax_t st_scount;		/* Total syncs to the log. */
513	uintmax_t st_region_wait;	/* Region lock granted after wait. */
514	uintmax_t st_region_nowait;	/* Region lock granted without wait. */
515	u_int32_t st_cur_file;		/* Current log file number. */
516	u_int32_t st_cur_offset;	/* Current log file offset. */
517	u_int32_t st_disk_file;		/* Known on disk log file number. */
518	u_int32_t st_disk_offset;	/* Known on disk log file offset. */
519	u_int32_t st_maxcommitperflush;	/* Max number of commits in a flush. */
520	u_int32_t st_mincommitperflush;	/* Min number of commits in a flush. */
521	roff_t	  st_regsize;		/* Region size. */
522#endif
523};
524
525/*
526 * We need to record the first log record of a transaction.  For user
527 * defined logging this macro returns the place to put that information,
528 * if it is need in rlsnp, otherwise it leaves it unchanged.  We also
529 * need to track the last record of the transaction, this returns the
530 * place to put that info.
531 */
532#define	DB_SET_TXN_LSNP(txn, blsnp, llsnp)		\
533	((txn)->set_txn_lsnp(txn, blsnp, llsnp))
534
535/*******************************************************
536 * Shared buffer cache (mpool).
537 *******************************************************/
538/* Priority values for DB_MPOOLFILE->{put,set_priority}. */
539typedef enum {
540	DB_PRIORITY_UNCHANGED=0,
541	DB_PRIORITY_VERY_LOW=1,
542	DB_PRIORITY_LOW=2,
543	DB_PRIORITY_DEFAULT=3,
544	DB_PRIORITY_HIGH=4,
545	DB_PRIORITY_VERY_HIGH=5
546} DB_CACHE_PRIORITY;
547
548/* Per-process DB_MPOOLFILE information. */
549struct __db_mpoolfile {
550	DB_FH	  *fhp;			/* Underlying file handle. */
551
552	/*
553	 * !!!
554	 * The ref, pinref and q fields are protected by the region lock.
555	 */
556	u_int32_t  ref;			/* Reference count. */
557
558	u_int32_t pinref;		/* Pinned block reference count. */
559
560	/*
561	 * !!!
562	 * Explicit representations of structures from queue.h.
563	 * TAILQ_ENTRY(__db_mpoolfile) q;
564	 */
565	struct {
566		struct __db_mpoolfile *tqe_next;
567		struct __db_mpoolfile **tqe_prev;
568	} q;				/* Linked list of DB_MPOOLFILE's. */
569
570	/*
571	 * !!!
572	 * The rest of the fields (with the exception of the MP_FLUSH flag)
573	 * are not thread-protected, even when they may be modified at any
574	 * time by the application.  The reason is the DB_MPOOLFILE handle
575	 * is single-threaded from the viewpoint of the application, and so
576	 * the only fields needing to be thread-protected are those accessed
577	 * by checkpoint or sync threads when using DB_MPOOLFILE structures
578	 * to flush buffers from the cache.
579	 */
580	ENV	       *env;		/* Environment */
581	MPOOLFILE      *mfp;		/* Underlying MPOOLFILE. */
582
583	u_int32_t	clear_len;	/* Cleared length on created pages. */
584	u_int8_t			/* Unique file ID. */
585			fileid[DB_FILE_ID_LEN];
586	int		ftype;		/* File type. */
587	int32_t		lsn_offset;	/* LSN offset in page. */
588	u_int32_t	gbytes, bytes;	/* Maximum file size. */
589	DBT	       *pgcookie;	/* Byte-string passed to pgin/pgout. */
590	int32_t		priority;	/* Cache priority. */
591
592	void	       *addr;		/* Address of mmap'd region. */
593	size_t		len;		/* Length of mmap'd region. */
594
595	u_int32_t	config_flags;	/* Flags to DB_MPOOLFILE->set_flags. */
596
597	/* DB_MPOOLFILE PUBLIC HANDLE LIST BEGIN */
598	int (*close) __P((DB_MPOOLFILE *, u_int32_t));
599	int (*get)
600	    __P((DB_MPOOLFILE *, db_pgno_t *, DB_TXN *, u_int32_t, void *));
601	int (*get_clear_len) __P((DB_MPOOLFILE *, u_int32_t *));
602	int (*get_fileid) __P((DB_MPOOLFILE *, u_int8_t *));
603	int (*get_flags) __P((DB_MPOOLFILE *, u_int32_t *));
604	int (*get_ftype) __P((DB_MPOOLFILE *, int *));
605	int (*get_last_pgno) __P((DB_MPOOLFILE *, db_pgno_t *));
606	int (*get_lsn_offset) __P((DB_MPOOLFILE *, int32_t *));
607	int (*get_maxsize) __P((DB_MPOOLFILE *, u_int32_t *, u_int32_t *));
608	int (*get_pgcookie) __P((DB_MPOOLFILE *, DBT *));
609	int (*get_priority) __P((DB_MPOOLFILE *, DB_CACHE_PRIORITY *));
610	int (*open) __P((DB_MPOOLFILE *, const char *, u_int32_t, int, size_t));
611	int (*put) __P((DB_MPOOLFILE *, void *, DB_CACHE_PRIORITY, u_int32_t));
612	int (*set_clear_len) __P((DB_MPOOLFILE *, u_int32_t));
613	int (*set_fileid) __P((DB_MPOOLFILE *, u_int8_t *));
614	int (*set_flags) __P((DB_MPOOLFILE *, u_int32_t, int));
615	int (*set_ftype) __P((DB_MPOOLFILE *, int));
616	int (*set_lsn_offset) __P((DB_MPOOLFILE *, int32_t));
617	int (*set_maxsize) __P((DB_MPOOLFILE *, u_int32_t, u_int32_t));
618	int (*set_pgcookie) __P((DB_MPOOLFILE *, DBT *));
619	int (*set_priority) __P((DB_MPOOLFILE *, DB_CACHE_PRIORITY));
620	int (*sync) __P((DB_MPOOLFILE *));
621	/* DB_MPOOLFILE PUBLIC HANDLE LIST END */
622
623	/*
624	 * MP_FILEID_SET, MP_OPEN_CALLED and MP_READONLY do not need to be
625	 * thread protected because they are initialized before the file is
626	 * linked onto the per-process lists, and never modified.
627	 *
628	 * MP_FLUSH is thread protected because it is potentially read/set by
629	 * multiple threads of control.
630	 */
631#define	MP_FILEID_SET	0x001		/* Application supplied a file ID. */
632#define	MP_FLUSH	0x002		/* Was opened to flush a buffer. */
633#define	MP_MULTIVERSION	0x004		/* Opened for multiversion access. */
634#define	MP_OPEN_CALLED	0x008		/* File opened. */
635#define	MP_READONLY	0x010		/* File is readonly. */
636#define	MP_DUMMY	0x020		/* File is dummy for __memp_fput. */
637	u_int32_t  flags;
638};
639
640/* Mpool statistics structure. */
641struct __db_mpool_stat {
642	u_int32_t st_gbytes;		/* Total cache size: GB. */
643	u_int32_t st_bytes;		/* Total cache size: B. */
644	u_int32_t st_ncache;		/* Number of cache regions. */
645	u_int32_t st_max_ncache;	/* Maximum number of regions. */
646	size_t	  st_mmapsize;		/* Maximum file size for mmap. */
647	int	  st_maxopenfd;		/* Maximum number of open fd's. */
648	int	  st_maxwrite;		/* Maximum buffers to write. */
649	db_timeout_t st_maxwrite_sleep;	/* Sleep after writing max buffers. */
650	u_int32_t st_pages;		/* Total number of pages. */
651#ifndef __TEST_DB_NO_STATISTICS
652	u_int32_t st_map;		/* Pages from mapped files. */
653	uintmax_t st_cache_hit;	/* Pages found in the cache. */
654	uintmax_t st_cache_miss;	/* Pages not found in the cache. */
655	uintmax_t st_page_create;	/* Pages created in the cache. */
656	uintmax_t st_page_in;		/* Pages read in. */
657	uintmax_t st_page_out;		/* Pages written out. */
658	uintmax_t st_ro_evict;		/* Clean pages forced from the cache. */
659	uintmax_t st_rw_evict;		/* Dirty pages forced from the cache. */
660	uintmax_t st_page_trickle;	/* Pages written by memp_trickle. */
661	u_int32_t st_page_clean;	/* Clean pages. */
662	u_int32_t st_page_dirty;	/* Dirty pages. */
663	u_int32_t st_hash_buckets;	/* Number of hash buckets. */
664	u_int32_t st_pagesize;		/* Assumed page size. */
665	u_int32_t st_hash_searches;	/* Total hash chain searches. */
666	u_int32_t st_hash_longest;	/* Longest hash chain searched. */
667	uintmax_t st_hash_examined;	/* Total hash entries searched. */
668	uintmax_t st_hash_nowait;	/* Hash lock granted with nowait. */
669	uintmax_t st_hash_wait;		/* Hash lock granted after wait. */
670	uintmax_t st_hash_max_nowait;	/* Max hash lock granted with nowait. */
671	uintmax_t st_hash_max_wait;	/* Max hash lock granted after wait. */
672	uintmax_t st_region_nowait;	/* Region lock granted with nowait. */
673	uintmax_t st_region_wait;	/* Region lock granted after wait. */
674	uintmax_t st_mvcc_frozen;	/* Buffers frozen. */
675	uintmax_t st_mvcc_thawed;	/* Buffers thawed. */
676	uintmax_t st_mvcc_freed;	/* Frozen buffers freed. */
677	uintmax_t st_alloc;		/* Number of page allocations. */
678	uintmax_t st_alloc_buckets;	/* Buckets checked during allocation. */
679	uintmax_t st_alloc_max_buckets;/* Max checked during allocation. */
680	uintmax_t st_alloc_pages;	/* Pages checked during allocation. */
681	uintmax_t st_alloc_max_pages;	/* Max checked during allocation. */
682	uintmax_t st_io_wait;		/* Thread waited on buffer I/O. */
683	uintmax_t st_sync_interrupted;	/* Number of times sync interrupted. */
684	roff_t	  st_regsize;		/* Region size. */
685#endif
686};
687
688/* Mpool file statistics structure. */
689struct __db_mpool_fstat {
690	char *file_name;		/* File name. */
691	u_int32_t st_pagesize;		/* Page size. */
692#ifndef __TEST_DB_NO_STATISTICS
693	u_int32_t st_map;		/* Pages from mapped files. */
694	uintmax_t st_cache_hit;	/* Pages found in the cache. */
695	uintmax_t st_cache_miss;	/* Pages not found in the cache. */
696	uintmax_t st_page_create;	/* Pages created in the cache. */
697	uintmax_t st_page_in;		/* Pages read in. */
698	uintmax_t st_page_out;		/* Pages written out. */
699#endif
700};
701
702/*******************************************************
703 * Transactions and recovery.
704 *******************************************************/
705#define	DB_TXNVERSION	1
706
707typedef enum {
708	DB_TXN_ABORT=0,			/* Public. */
709	DB_TXN_APPLY=1,			/* Public. */
710	DB_TXN_BACKWARD_ROLL=3,		/* Public. */
711	DB_TXN_FORWARD_ROLL=4,		/* Public. */
712	DB_TXN_OPENFILES=5,		/* Internal. */
713	DB_TXN_POPENFILES=6,		/* Internal. */
714	DB_TXN_PRINT=7			/* Public. */
715} db_recops;
716
717/*
718 * BACKWARD_ALLOC is used during the forward pass to pick up any aborted
719 * allocations for files that were created during the forward pass.
720 * The main difference between _ALLOC and _ROLL is that the entry for
721 * the file not exist during the rollforward pass.
722 */
723#define	DB_UNDO(op)	((op) == DB_TXN_ABORT || (op) == DB_TXN_BACKWARD_ROLL)
724#define	DB_REDO(op)	((op) == DB_TXN_FORWARD_ROLL || (op) == DB_TXN_APPLY)
725
726struct __db_txn {
727	DB_TXNMGR	*mgrp;		/* Pointer to transaction manager. */
728	DB_TXN		*parent;	/* Pointer to transaction's parent. */
729	DB_THREAD_INFO	*thread_info;	/* Pointer to thread information. */
730
731	u_int32_t	txnid;		/* Unique transaction id. */
732	char		*name;		/* Transaction name. */
733	DB_LOCKER	*locker;	/* Locker for this txn. */
734
735	void		*td;		/* Detail structure within region. */
736	db_timeout_t	lock_timeout;	/* Timeout for locks for this txn. */
737	db_timeout_t	expire;		/* Time transaction expires. */
738	void		*txn_list;	/* Undo information for parent. */
739
740	/*
741	 * !!!
742	 * Explicit representations of structures from queue.h.
743	 * TAILQ_ENTRY(__db_txn) links;
744	 */
745	struct {
746		struct __db_txn *tqe_next;
747		struct __db_txn **tqe_prev;
748	} links;			/* Links transactions off manager. */
749
750	/*
751	 * !!!
752	 * Explicit representations of structures from queue.h.
753	 * TAILQ_HEAD(__kids, __db_txn) kids;
754	 */
755	struct __kids {
756		struct __db_txn *tqh_first;
757		struct __db_txn **tqh_last;
758	} kids;
759
760	/*
761	 * !!!
762	 * Explicit representations of structures from queue.h.
763	 * TAILQ_HEAD(__events, __txn_event) events;
764	 */
765	struct {
766		struct __txn_event *tqh_first;
767		struct __txn_event **tqh_last;
768	} events;			/* Links deferred events. */
769
770	/*
771	 * !!!
772	 * Explicit representations of structures from queue.h.
773	 * STAILQ_HEAD(__logrec, __txn_logrec) logs;
774	 */
775	struct {
776		struct __txn_logrec *stqh_first;
777		struct __txn_logrec **stqh_last;
778	} logs;				/* Links in memory log records. */
779
780	/*
781	 * !!!
782	 * Explicit representations of structures from queue.h.
783	 * TAILQ_ENTRY(__db_txn) klinks;
784	 */
785	struct {
786		struct __db_txn *tqe_next;
787		struct __db_txn **tqe_prev;
788	} klinks;
789
790	void	*api_internal;		/* C++ API private. */
791	void	*xml_internal;		/* XML API private. */
792
793	u_int32_t	cursors;	/* Number of cursors open for txn */
794
795	/* DB_TXN PUBLIC HANDLE LIST BEGIN */
796	int	  (*abort) __P((DB_TXN *));
797	int	  (*commit) __P((DB_TXN *, u_int32_t));
798	int	  (*discard) __P((DB_TXN *, u_int32_t));
799	int	  (*get_name) __P((DB_TXN *, const char **));
800	u_int32_t (*id) __P((DB_TXN *));
801	int	  (*prepare) __P((DB_TXN *, u_int8_t *));
802	int	  (*set_name) __P((DB_TXN *, const char *));
803	int	  (*set_timeout) __P((DB_TXN *, db_timeout_t, u_int32_t));
804	/* DB_TXN PUBLIC HANDLE LIST END */
805
806	/* DB_TXN PRIVATE HANDLE LIST BEGIN */
807	void	  (*set_txn_lsnp) __P((DB_TXN *txn, DB_LSN **, DB_LSN **));
808	/* DB_TXN PRIVATE HANDLE LIST END */
809
810#define	TXN_CHILDCOMMIT		0x0001	/* Txn has committed. */
811#define	TXN_CDSGROUP		0x0002	/* CDS group handle. */
812#define	TXN_COMPENSATE		0x0004	/* Compensating transaction. */
813#define	TXN_DEADLOCK		0x0008	/* Txn has deadlocked. */
814#define	TXN_LOCKTIMEOUT		0x0010	/* Txn has a lock timeout. */
815#define	TXN_MALLOC		0x0020	/* Structure allocated by TXN system. */
816#define	TXN_NOSYNC		0x0040	/* Do not sync on prepare and commit. */
817#define	TXN_NOWAIT		0x0080	/* Do not wait on locks. */
818#define	TXN_PRIVATE		0x0100	/* Txn owned by cursor.. */
819#define	TXN_READ_COMMITTED	0x0200	/* Txn has degree 2 isolation. */
820#define	TXN_READ_UNCOMMITTED	0x0400	/* Txn has degree 1 isolation. */
821#define	TXN_RESTORED		0x0800	/* Txn has been restored. */
822#define	TXN_SNAPSHOT		0x1000	/* Snapshot Isolation. */
823#define	TXN_SYNC		0x2000	/* Write and sync on prepare/commit. */
824#define	TXN_WRITE_NOSYNC	0x4000	/* Write only on prepare/commit. */
825	u_int32_t	flags;
826};
827
828#define	TXN_SYNC_FLAGS (TXN_SYNC | TXN_NOSYNC | TXN_WRITE_NOSYNC)
829
830/*
831 * Structure used for two phase commit interface.
832 * We set the size of our global transaction id (gid) to be 128 in order
833 * to match that defined by the XA X/Open standard.
834 */
835#define	DB_GID_SIZE	128
836struct __db_preplist {
837	DB_TXN	*txn;
838	u_int8_t gid[DB_GID_SIZE];
839};
840
841/* Transaction statistics structure. */
842struct __db_txn_active {
843	u_int32_t txnid;		/* Transaction ID */
844	u_int32_t parentid;		/* Transaction ID of parent */
845	pid_t     pid;			/* Process owning txn ID */
846	db_threadid_t tid;		/* Thread owning txn ID */
847
848	DB_LSN	  lsn;			/* LSN when transaction began */
849
850	DB_LSN	  read_lsn;		/* Read LSN for MVCC */
851	u_int32_t mvcc_ref;		/* MVCC reference count */
852
853#define	TXN_ABORTED		1
854#define	TXN_COMMITTED		2
855#define	TXN_PREPARED		3
856#define	TXN_RUNNING		4
857	u_int32_t status;		/* Status of the transaction */
858
859	u_int8_t  gid[DB_GID_SIZE];	/* Global transaction ID */
860	char	  name[51];		/* 50 bytes of name, nul termination */
861};
862
863struct __db_txn_stat {
864	u_int32_t st_nrestores;		/* number of restored transactions
865					   after recovery. */
866#ifndef __TEST_DB_NO_STATISTICS
867	DB_LSN	  st_last_ckp;		/* lsn of the last checkpoint */
868	time_t	  st_time_ckp;		/* time of last checkpoint */
869	u_int32_t st_last_txnid;	/* last transaction id given out */
870	u_int32_t st_maxtxns;		/* maximum txns possible */
871	uintmax_t st_naborts;		/* number of aborted transactions */
872	uintmax_t st_nbegins;		/* number of begun transactions */
873	uintmax_t st_ncommits;		/* number of committed transactions */
874	u_int32_t st_nactive;		/* number of active transactions */
875	u_int32_t st_nsnapshot;		/* number of snapshot transactions */
876	u_int32_t st_maxnactive;	/* maximum active transactions */
877	u_int32_t st_maxnsnapshot;	/* maximum snapshot transactions */
878	DB_TXN_ACTIVE *st_txnarray;	/* array of active transactions */
879	uintmax_t st_region_wait;	/* Region lock granted after wait. */
880	uintmax_t st_region_nowait;	/* Region lock granted without wait. */
881	roff_t	  st_regsize;		/* Region size. */
882#endif
883};
884
885/*******************************************************
886 * Replication.
887 *******************************************************/
888/* Special, out-of-band environment IDs. */
889#define	DB_EID_BROADCAST	-1
890#define	DB_EID_INVALID		-2
891
892#define	DB_REP_DEFAULT_PRIORITY		100
893
894/* Acknowledgement policies. */
895#define	DB_REPMGR_ACKS_ALL		1
896#define	DB_REPMGR_ACKS_ALL_PEERS	2
897#define	DB_REPMGR_ACKS_NONE		3
898#define	DB_REPMGR_ACKS_ONE		4
899#define	DB_REPMGR_ACKS_ONE_PEER		5
900#define	DB_REPMGR_ACKS_QUORUM		6
901
902/* Replication timeout configuration values. */
903#define	DB_REP_ACK_TIMEOUT		1	/* RepMgr acknowledgements. */
904#define	DB_REP_CHECKPOINT_DELAY		2	/* Master checkpoint delay. */
905#define	DB_REP_CONNECTION_RETRY		3	/* RepMgr connections. */
906#define	DB_REP_ELECTION_RETRY		4	/* RepMgr elect retries. */
907#define	DB_REP_ELECTION_TIMEOUT		5	/* Rep normal elections. */
908#define	DB_REP_FULL_ELECTION_TIMEOUT	6	/* Rep full elections. */
909#define	DB_REP_HEARTBEAT_MONITOR	7	/* RepMgr client HB monitor. */
910#define	DB_REP_HEARTBEAT_SEND		8	/* RepMgr master send freq. */
911#define	DB_REP_LEASE_TIMEOUT		9	/* Master leases. */
912
913/* Event notification types. */
914#define	DB_EVENT_NO_SUCH_EVENT		 0 /* out-of-band sentinel value */
915#define	DB_EVENT_PANIC			 1
916#define	DB_EVENT_REG_ALIVE		 2
917#define	DB_EVENT_REG_PANIC		 3
918#define	DB_EVENT_REP_CLIENT		 4
919#define	DB_EVENT_REP_ELECTED		 5
920#define	DB_EVENT_REP_MASTER		 6
921#define	DB_EVENT_REP_NEWMASTER		 7
922#define	DB_EVENT_REP_PERM_FAILED	 8
923#define	DB_EVENT_REP_STARTUPDONE	 9
924#define	DB_EVENT_WRITE_FAILED		10
925
926/* Replication Manager site status. */
927struct __db_repmgr_site {
928	int eid;
929	char *host;
930	u_int port;
931
932#define	DB_REPMGR_CONNECTED	0x01
933#define	DB_REPMGR_DISCONNECTED	0x02
934	u_int32_t status;
935};
936
937/* Replication statistics. */
938struct __db_rep_stat {
939	/* !!!
940	 * Many replication statistics fields cannot be protected by a mutex
941	 * without an unacceptable performance penalty, since most message
942	 * processing is done without the need to hold a region-wide lock.
943	 * Fields whose comments end with a '+' may be updated without holding
944	 * the replication or log mutexes (as appropriate), and thus may be
945	 * off somewhat (or, on unreasonable architectures under unlucky
946	 * circumstances, garbaged).
947	 */
948	uintmax_t st_log_queued;	/* Log records currently queued.+ */
949	u_int32_t st_startup_complete;	/* Site completed client sync-up. */
950#ifndef __TEST_DB_NO_STATISTICS
951	u_int32_t st_status;		/* Current replication status. */
952	DB_LSN st_next_lsn;		/* Next LSN to use or expect. */
953	DB_LSN st_waiting_lsn;		/* LSN we're awaiting, if any. */
954	DB_LSN st_max_perm_lsn;		/* Maximum permanent LSN. */
955	db_pgno_t st_next_pg;		/* Next pg we expect. */
956	db_pgno_t st_waiting_pg;	/* pg we're awaiting, if any. */
957
958	u_int32_t st_dupmasters;	/* # of times a duplicate master
959					   condition was detected.+ */
960	int st_env_id;			/* Current environment ID. */
961	u_int32_t st_env_priority;	/* Current environment priority. */
962	uintmax_t st_bulk_fills;	/* Bulk buffer fills. */
963	uintmax_t st_bulk_overflows;	/* Bulk buffer overflows. */
964	uintmax_t st_bulk_records;	/* Bulk records stored. */
965	uintmax_t st_bulk_transfers;	/* Transfers of bulk buffers. */
966	uintmax_t st_client_rerequests;/* Number of forced rerequests. */
967	uintmax_t st_client_svc_req;	/* Number of client service requests
968					   received by this client. */
969	uintmax_t st_client_svc_miss;	/* Number of client service requests
970					   missing on this client. */
971	u_int32_t st_gen;		/* Current generation number. */
972	u_int32_t st_egen;		/* Current election gen number. */
973	uintmax_t st_log_duplicated;	/* Log records received multiply.+ */
974	uintmax_t st_log_queued_max;	/* Max. log records queued at once.+ */
975	uintmax_t st_log_queued_total;	/* Total # of log recs. ever queued.+ */
976	uintmax_t st_log_records;	/* Log records received and put.+ */
977	uintmax_t st_log_requested;	/* Log recs. missed and requested.+ */
978	int st_master;			/* Env. ID of the current master. */
979	uintmax_t st_master_changes;	/* # of times we've switched masters. */
980	uintmax_t st_msgs_badgen;	/* Messages with a bad generation #.+ */
981	uintmax_t st_msgs_processed;	/* Messages received and processed.+ */
982	uintmax_t st_msgs_recover;	/* Messages ignored because this site
983					   was a client in recovery.+ */
984	uintmax_t st_msgs_send_failures;/* # of failed message sends.+ */
985	uintmax_t st_msgs_sent;	/* # of successful message sends.+ */
986	uintmax_t st_newsites;		/* # of NEWSITE msgs. received.+ */
987	u_int32_t st_nsites;		/* Current number of sites we will
988					   assume during elections. */
989	uintmax_t st_nthrottles;	/* # of times we were throttled. */
990	uintmax_t st_outdated;		/* # of times we detected and returned
991					   an OUTDATED condition.+ */
992	uintmax_t st_pg_duplicated;	/* Pages received multiply.+ */
993	uintmax_t st_pg_records;	/* Pages received and stored.+ */
994	uintmax_t st_pg_requested;	/* Pages missed and requested.+ */
995	uintmax_t st_txns_applied;	/* # of transactions applied.+ */
996	uintmax_t st_startsync_delayed;/* # of STARTSYNC msgs delayed.+ */
997
998	/* Elections generally. */
999	uintmax_t st_elections;	/* # of elections held.+ */
1000	uintmax_t st_elections_won;	/* # of elections won by this site.+ */
1001
1002	/* Statistics about an in-progress election. */
1003	int st_election_cur_winner;	/* Current front-runner. */
1004	u_int32_t st_election_gen;	/* Election generation number. */
1005	DB_LSN st_election_lsn;		/* Max. LSN of current winner. */
1006	u_int32_t st_election_nsites;	/* # of "registered voters". */
1007	u_int32_t st_election_nvotes;	/* # of "registered voters" needed. */
1008	u_int32_t st_election_priority;	/* Current election priority. */
1009	int st_election_status;		/* Current election status. */
1010	u_int32_t st_election_tiebreaker;/* Election tiebreaker value. */
1011	u_int32_t st_election_votes;	/* Votes received in this round. */
1012	u_int32_t st_election_sec;	/* Last election time seconds. */
1013	u_int32_t st_election_usec;	/* Last election time useconds. */
1014	u_int32_t st_max_lease_sec;	/* Maximum lease timestamp seconds. */
1015	u_int32_t st_max_lease_usec;	/* Maximum lease timestamp useconds. */
1016
1017	/* Undocumented statistics only used by the test system. */
1018#ifdef	CONFIG_TEST
1019	u_int32_t st_filefail_cleanups;	/* # of FILE_FAIL cleanups done. */
1020#endif
1021#endif
1022};
1023
1024/* Replication Manager statistics. */
1025struct __db_repmgr_stat {
1026	uintmax_t st_perm_failed;	/* # of insufficiently ack'ed msgs. */
1027	uintmax_t st_msgs_queued;	/* # msgs queued for network delay. */
1028	uintmax_t st_msgs_dropped;	/* # msgs discarded due to excessive
1029					   queue length. */
1030	uintmax_t st_connection_drop;	/* Existing connections dropped. */
1031	uintmax_t st_connect_fail;	/* Failed new connection attempts. */
1032};
1033
1034/*******************************************************
1035 * Sequences.
1036 *******************************************************/
1037/*
1038 * The storage record for a sequence.
1039 */
1040struct __db_seq_record {
1041	u_int32_t	seq_version;	/* Version size/number. */
1042	u_int32_t	flags;		/* DB_SEQ_XXX Flags. */
1043	db_seq_t	seq_value;	/* Current value. */
1044	db_seq_t	seq_max;	/* Max permitted. */
1045	db_seq_t	seq_min;	/* Min permitted. */
1046};
1047
1048/*
1049 * Handle for a sequence object.
1050 */
1051struct __db_sequence {
1052	DB		*seq_dbp;	/* DB handle for this sequence. */
1053	db_mutex_t	mtx_seq;	/* Mutex if sequence is threaded. */
1054	DB_SEQ_RECORD	*seq_rp;	/* Pointer to current data. */
1055	DB_SEQ_RECORD	seq_record;	/* Data from DB_SEQUENCE. */
1056	int32_t		seq_cache_size; /* Number of values cached. */
1057	db_seq_t	seq_last_value;	/* Last value cached. */
1058	DBT		seq_key;	/* DBT pointing to sequence key. */
1059	DBT		seq_data;	/* DBT pointing to seq_record. */
1060
1061	/* API-private structure: used by C++ and Java. */
1062	void		*api_internal;
1063
1064	/* DB_SEQUENCE PUBLIC HANDLE LIST BEGIN */
1065	int		(*close) __P((DB_SEQUENCE *, u_int32_t));
1066	int		(*get) __P((DB_SEQUENCE *,
1067			      DB_TXN *, int32_t, db_seq_t *, u_int32_t));
1068	int		(*get_cachesize) __P((DB_SEQUENCE *, int32_t *));
1069	int		(*get_db) __P((DB_SEQUENCE *, DB **));
1070	int		(*get_flags) __P((DB_SEQUENCE *, u_int32_t *));
1071	int		(*get_key) __P((DB_SEQUENCE *, DBT *));
1072	int		(*get_range) __P((DB_SEQUENCE *,
1073			     db_seq_t *, db_seq_t *));
1074	int		(*initial_value) __P((DB_SEQUENCE *, db_seq_t));
1075	int		(*open) __P((DB_SEQUENCE *,
1076			    DB_TXN *, DBT *, u_int32_t));
1077	int		(*remove) __P((DB_SEQUENCE *, DB_TXN *, u_int32_t));
1078	int		(*set_cachesize) __P((DB_SEQUENCE *, int32_t));
1079	int		(*set_flags) __P((DB_SEQUENCE *, u_int32_t));
1080	int		(*set_range) __P((DB_SEQUENCE *, db_seq_t, db_seq_t));
1081	int		(*stat) __P((DB_SEQUENCE *,
1082			    DB_SEQUENCE_STAT **, u_int32_t));
1083	int		(*stat_print) __P((DB_SEQUENCE *, u_int32_t));
1084	/* DB_SEQUENCE PUBLIC HANDLE LIST END */
1085};
1086
1087struct __db_seq_stat {
1088	uintmax_t st_wait;		/* Sequence lock granted w/o wait. */
1089	uintmax_t st_nowait;		/* Sequence lock granted after wait. */
1090	db_seq_t  st_current;		/* Current value in db. */
1091	db_seq_t  st_value;		/* Current cached value. */
1092	db_seq_t  st_last_value;	/* Last cached value. */
1093	db_seq_t  st_min;		/* Minimum value. */
1094	db_seq_t  st_max;		/* Maximum value. */
1095	int32_t   st_cache_size;	/* Cache size. */
1096	u_int32_t st_flags;		/* Flag value. */
1097};
1098
1099/*******************************************************
1100 * Access methods.
1101 *******************************************************/
1102typedef enum {
1103	DB_BTREE=1,
1104	DB_HASH=2,
1105	DB_RECNO=3,
1106	DB_QUEUE=4,
1107	DB_UNKNOWN=5			/* Figure it out on open. */
1108} DBTYPE;
1109
1110#define	DB_RENAMEMAGIC	0x030800	/* File has been renamed. */
1111
1112#define	DB_BTREEVERSION	9		/* Current btree version. */
1113#define	DB_BTREEOLDVER	8		/* Oldest btree version supported. */
1114#define	DB_BTREEMAGIC	0x053162
1115
1116#define	DB_HASHVERSION	9		/* Current hash version. */
1117#define	DB_HASHOLDVER	7		/* Oldest hash version supported. */
1118#define	DB_HASHMAGIC	0x061561
1119
1120#define	DB_QAMVERSION	4		/* Current queue version. */
1121#define	DB_QAMOLDVER	3		/* Oldest queue version supported. */
1122#define	DB_QAMMAGIC	0x042253
1123
1124#define	DB_SEQUENCE_VERSION 2		/* Current sequence version. */
1125#define	DB_SEQUENCE_OLDVER  1		/* Oldest sequence version supported. */
1126
1127/*
1128 * DB access method and cursor operation values.  Each value is an operation
1129 * code to which additional bit flags are added.
1130 */
1131#define	DB_AFTER		 1	/* Dbc.put */
1132#define	DB_APPEND		 2	/* Db.put */
1133#define	DB_BEFORE		 3	/* Dbc.put */
1134#define	DB_CONSUME		 4	/* Db.get */
1135#define	DB_CONSUME_WAIT		 5	/* Db.get */
1136#define	DB_CURRENT		 6	/* Dbc.get, Dbc.put, DbLogc.get */
1137#define	DB_FIRST		 7	/* Dbc.get, DbLogc->get */
1138#define	DB_GET_BOTH		 8	/* Db.get, Dbc.get */
1139#define	DB_GET_BOTHC		 9	/* Dbc.get (internal) */
1140#define	DB_GET_BOTH_RANGE	10	/* Db.get, Dbc.get */
1141#define	DB_GET_RECNO		11	/* Dbc.get */
1142#define	DB_JOIN_ITEM		12	/* Dbc.get; don't do primary lookup */
1143#define	DB_KEYFIRST		13	/* Dbc.put */
1144#define	DB_KEYLAST		14	/* Dbc.put */
1145#define	DB_LAST			15	/* Dbc.get, DbLogc->get */
1146#define	DB_NEXT			16	/* Dbc.get, DbLogc->get */
1147#define	DB_NEXT_DUP		17	/* Dbc.get */
1148#define	DB_NEXT_NODUP		18	/* Dbc.get */
1149#define	DB_NODUPDATA		19	/* Db.put, Dbc.put */
1150#define	DB_NOOVERWRITE		20	/* Db.put */
1151#define	DB_NOSYNC		21	/* Db.close */
1152#define	DB_OVERWRITE_DUP	22	/* Dbc.put, Db.put; no DB_KEYEXIST */
1153#define	DB_POSITION		23	/* Dbc.dup */
1154#define	DB_PREV			24	/* Dbc.get, DbLogc->get */
1155#define	DB_PREV_DUP		25	/* Dbc.get */
1156#define	DB_PREV_NODUP		26	/* Dbc.get */
1157#define	DB_SET			27	/* Dbc.get, DbLogc->get */
1158#define	DB_SET_RANGE		28	/* Dbc.get */
1159#define	DB_SET_RECNO		29	/* Db.get, Dbc.get */
1160#define	DB_UPDATE_SECONDARY	30	/* Dbc.get, Dbc.del (internal) */
1161#define	DB_SET_LTE		31	/* Dbc.get (internal) */
1162#define	DB_GET_BOTH_LTE		32	/* Dbc.get (internal) */
1163
1164/* This has to change when the max opcode hits 255. */
1165#define	DB_OPFLAGS_MASK	0x000000ff	/* Mask for operations flags. */
1166
1167/*
1168 * DB (user visible) error return codes.
1169 *
1170 * !!!
1171 * We don't want our error returns to conflict with other packages where
1172 * possible, so pick a base error value that's hopefully not common.  We
1173 * document that we own the error name space from -30,800 to -30,999.
1174 */
1175/* DB (public) error return codes. */
1176#define	DB_BUFFER_SMALL		(-30999)/* User memory too small for return. */
1177#define	DB_DONOTINDEX		(-30998)/* "Null" return from 2ndary callbk. */
1178#define	DB_FOREIGN_CONFLICT	(-30997)/* A foreign db constraint triggered. */
1179#define	DB_KEYEMPTY		(-30996)/* Key/data deleted or never created. */
1180#define	DB_KEYEXIST		(-30995)/* The key/data pair already exists. */
1181#define	DB_LOCK_DEADLOCK	(-30994)/* Deadlock. */
1182#define	DB_LOCK_NOTGRANTED	(-30993)/* Lock unavailable. */
1183#define	DB_LOG_BUFFER_FULL	(-30992)/* In-memory log buffer full. */
1184#define	DB_NOSERVER		(-30991)/* Server panic return. */
1185#define	DB_NOSERVER_HOME	(-30990)/* Bad home sent to server. */
1186#define	DB_NOSERVER_ID		(-30989)/* Bad ID sent to server. */
1187#define	DB_NOTFOUND		(-30988)/* Key/data pair not found (EOF). */
1188#define	DB_OLD_VERSION		(-30987)/* Out-of-date version. */
1189#define	DB_PAGE_NOTFOUND	(-30986)/* Requested page not found. */
1190#define	DB_REP_DUPMASTER	(-30985)/* There are two masters. */
1191#define	DB_REP_HANDLE_DEAD	(-30984)/* Rolled back a commit. */
1192#define	DB_REP_HOLDELECTION	(-30983)/* Time to hold an election. */
1193#define	DB_REP_IGNORE		(-30982)/* This msg should be ignored.*/
1194#define	DB_REP_ISPERM		(-30981)/* Cached not written perm written.*/
1195#define	DB_REP_JOIN_FAILURE	(-30980)/* Unable to join replication group. */
1196#define	DB_REP_LEASE_EXPIRED	(-30979)/* Master lease has expired. */
1197#define	DB_REP_LOCKOUT		(-30978)/* API/Replication lockout now. */
1198#define	DB_REP_NEWSITE		(-30977)/* New site entered system. */
1199#define	DB_REP_NOTPERM		(-30976)/* Permanent log record not written. */
1200#define	DB_REP_UNAVAIL		(-30975)/* Site cannot currently be reached. */
1201#define	DB_RUNRECOVERY		(-30974)/* Panic return. */
1202#define	DB_SECONDARY_BAD	(-30973)/* Secondary index corrupt. */
1203#define	DB_VERIFY_BAD		(-30972)/* Verify failed; bad format. */
1204#define	DB_VERSION_MISMATCH	(-30971)/* Environment version mismatch. */
1205
1206/* DB (private) error return codes. */
1207#define	DB_ALREADY_ABORTED	(-30899)
1208#define	DB_DELETED		(-30898)/* Recovery file marked deleted. */
1209#define	DB_EVENT_NOT_HANDLED	(-30897)/* Forward event to application. */
1210#define	DB_NEEDSPLIT		(-30896)/* Page needs to be split. */
1211#define	DB_REP_BULKOVF		(-30895)/* Rep bulk buffer overflow. */
1212#define	DB_REP_EGENCHG		(-30894)/* Egen changed while in election. */
1213#define	DB_REP_LOGREADY		(-30893)/* Rep log ready for recovery. */
1214#define	DB_REP_NEWMASTER	(-30892)/* We have learned of a new master. */
1215#define	DB_REP_PAGEDONE		(-30891)/* This page was already done. */
1216#define	DB_REP_PAGELOCKED	(-30890)/* Page we want is locked. */
1217#define	DB_SURPRISE_KID		(-30889)/* Child commit where parent
1218					   didn't know it was a parent. */
1219#define	DB_SWAPBYTES		(-30888)/* Database needs byte swapping. */
1220#define	DB_TIMEOUT		(-30887)/* Timed out waiting for election. */
1221#define	DB_TXN_CKP		(-30886)/* Encountered ckp record in log. */
1222#define	DB_VERIFY_FATAL		(-30885)/* DB->verify cannot proceed. */
1223
1224/* Database handle. */
1225struct __db {
1226	/*******************************************************
1227	 * Public: owned by the application.
1228	 *******************************************************/
1229	u_int32_t pgsize;		/* Database logical page size. */
1230	DB_CACHE_PRIORITY priority;	/* Database priority in cache. */
1231
1232					/* Callbacks. */
1233	int (*db_append_recno) __P((DB *, DBT *, db_recno_t));
1234	void (*db_feedback) __P((DB *, int, int));
1235	int (*dup_compare) __P((DB *, const DBT *, const DBT *));
1236
1237	void	*app_private;		/* Application-private handle. */
1238
1239	/*******************************************************
1240	 * Private: owned by DB.
1241	 *******************************************************/
1242	DB_ENV	*dbenv;			/* Backing public environment. */
1243	ENV	*env;			/* Backing private environment. */
1244
1245	DBTYPE	 type;			/* DB access method type. */
1246
1247	DB_MPOOLFILE *mpf;		/* Backing buffer pool. */
1248
1249	db_mutex_t mutex;		/* Synchronization for free threading */
1250
1251	char *fname, *dname;		/* File/database passed to DB->open. */
1252	const char *dirname;		/* Direcory of DB file. */
1253	u_int32_t open_flags;		/* Flags passed to DB->open. */
1254
1255	u_int8_t fileid[DB_FILE_ID_LEN];/* File's unique ID for locking. */
1256
1257	u_int32_t adj_fileid;		/* File's unique ID for curs. adj. */
1258
1259#define	DB_LOGFILEID_INVALID	-1
1260	FNAME *log_filename;		/* File's naming info for logging. */
1261
1262	db_pgno_t meta_pgno;		/* Meta page number */
1263	DB_LOCKER *locker;		/* Locker for handle locking. */
1264	DB_LOCKER *cur_locker;		/* Current handle lock holder. */
1265	DB_TXN *cur_txn;		/* Opening transaction. */
1266	DB_LOCKER *associate_locker;	/* Locker for DB->associate call. */
1267	DB_LOCK	 handle_lock;		/* Lock held on this handle. */
1268
1269	u_int	 cl_id;			/* RPC: remote client id. */
1270
1271	time_t	 timestamp;		/* Handle timestamp for replication. */
1272	u_int32_t fid_gen;		/* Rep generation number for fids. */
1273
1274	/*
1275	 * Returned data memory for DB->get() and friends.
1276	 */
1277	DBT	 my_rskey;		/* Secondary key. */
1278	DBT	 my_rkey;		/* [Primary] key. */
1279	DBT	 my_rdata;		/* Data. */
1280
1281	/*
1282	 * !!!
1283	 * Some applications use DB but implement their own locking outside of
1284	 * DB.  If they're using fcntl(2) locking on the underlying database
1285	 * file, and we open and close a file descriptor for that file, we will
1286	 * discard their locks.  The DB_FCNTL_LOCKING flag to DB->open is an
1287	 * undocumented interface to support this usage which leaves any file
1288	 * descriptors we open until DB->close.  This will only work with the
1289	 * DB->open interface and simple caches, e.g., creating a transaction
1290	 * thread may open/close file descriptors this flag doesn't protect.
1291	 * Locking with fcntl(2) on a file that you don't own is a very, very
1292	 * unsafe thing to do.  'Nuff said.
1293	 */
1294	DB_FH	*saved_open_fhp;	/* Saved file handle. */
1295
1296	/*
1297	 * Linked list of DBP's, linked from the ENV, used to keep track
1298	 * of all open db handles for cursor adjustment.
1299	 *
1300	 * !!!
1301	 * Explicit representations of structures from queue.h.
1302	 * TAILQ_ENTRY(__db) dblistlinks;
1303	 */
1304	struct {
1305		struct __db *tqe_next;
1306		struct __db **tqe_prev;
1307	} dblistlinks;
1308
1309	/*
1310	 * Cursor queues.
1311	 *
1312	 * !!!
1313	 * Explicit representations of structures from queue.h.
1314	 * TAILQ_HEAD(__cq_fq, __dbc) free_queue;
1315	 * TAILQ_HEAD(__cq_aq, __dbc) active_queue;
1316	 * TAILQ_HEAD(__cq_jq, __dbc) join_queue;
1317	 */
1318	struct __cq_fq {
1319		struct __dbc *tqh_first;
1320		struct __dbc **tqh_last;
1321	} free_queue;
1322	struct __cq_aq {
1323		struct __dbc *tqh_first;
1324		struct __dbc **tqh_last;
1325	} active_queue;
1326	struct __cq_jq {
1327		struct __dbc *tqh_first;
1328		struct __dbc **tqh_last;
1329	} join_queue;
1330
1331	/*
1332	 * Secondary index support.
1333	 *
1334	 * Linked list of secondary indices -- set in the primary.
1335	 *
1336	 * !!!
1337	 * Explicit representations of structures from queue.h.
1338	 * LIST_HEAD(s_secondaries, __db);
1339	 */
1340	struct {
1341		struct __db *lh_first;
1342	} s_secondaries;
1343
1344	/*
1345	 * List entries for secondaries, and reference count of how many
1346	 * threads are updating this secondary (see Dbc.put).
1347	 *
1348	 * !!!
1349	 * Note that these are synchronized by the primary's mutex, but
1350	 * filled in in the secondaries.
1351	 *
1352	 * !!!
1353	 * Explicit representations of structures from queue.h.
1354	 * LIST_ENTRY(__db) s_links;
1355	 */
1356	struct {
1357		struct __db *le_next;
1358		struct __db **le_prev;
1359	} s_links;
1360	u_int32_t s_refcnt;
1361
1362	/* Secondary callback and free functions -- set in the secondary. */
1363	int	(*s_callback) __P((DB *, const DBT *, const DBT *, DBT *));
1364
1365	/* Reference to primary -- set in the secondary. */
1366	DB	*s_primary;
1367
1368#define	DB_ASSOC_IMMUTABLE_KEY    0x00000001 /* Secondary key is immutable. */
1369
1370	/* Flags passed to associate -- set in the secondary. */
1371	u_int32_t s_assoc_flags;
1372
1373	/*
1374	 * Foreign key support.
1375	 *
1376	 * Linked list of primary dbs -- set in the foreign db
1377	 *
1378	 * !!!
1379	 * Explicit representations of structures from queue.h.
1380	 * LIST_HEAD(f_primaries, __db);
1381	 */
1382	struct {
1383		struct __db_foreign_info *lh_first;
1384	} f_primaries;
1385
1386	/* Reference to foreign -- set in the secondary. */
1387	DB      *s_foreign;
1388
1389	/* API-private structure: used by DB 1.85, C++, Java, Perl and Tcl */
1390	void	*api_internal;
1391
1392	/* Subsystem-private structure. */
1393	void	*bt_internal;		/* Btree/Recno access method. */
1394	void	*h_internal;		/* Hash access method. */
1395	void	*p_internal;		/* Partition informaiton. */
1396	void	*q_internal;		/* Queue access method. */
1397
1398	/* DB PUBLIC HANDLE LIST BEGIN */
1399	int  (*associate) __P((DB *, DB_TXN *, DB *,
1400		int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t));
1401	int  (*associate_foreign) __P((DB *, DB *,
1402		int (*)(DB *, const DBT *, DBT *, const DBT *, int *),
1403		u_int32_t));
1404	int  (*close) __P((DB *, u_int32_t));
1405	int  (*compact) __P((DB *,
1406		DB_TXN *, DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *));
1407	int  (*cursor) __P((DB *, DB_TXN *, DBC **, u_int32_t));
1408	int  (*del) __P((DB *, DB_TXN *, DBT *, u_int32_t));
1409	void (*err) __P((DB *, int, const char *, ...));
1410	void (*errx) __P((DB *, const char *, ...));
1411	int  (*exists) __P((DB *, DB_TXN *, DBT *, u_int32_t));
1412	int  (*fd) __P((DB *, int *));
1413	int  (*get) __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
1414	int  (*get_alloc) __P((DB *, void *(**)(size_t),
1415		void *(**)(void *, size_t), void (**)(void *)));
1416	int  (*get_append_recno) __P((DB *, int (**)(DB *, DBT *, db_recno_t)));
1417	int  (*get_bt_compare)
1418		__P((DB *, int (**)(DB *, const DBT *, const DBT *)));
1419	int  (*get_bt_compress) __P((DB *,
1420		int (**)(DB *, 
1421		const DBT *, const DBT *, const DBT *, const DBT *, DBT *),
1422		int (**)(DB *, const DBT *, const DBT *, DBT *, DBT *, DBT *)));
1423	int  (*get_bt_minkey) __P((DB *, u_int32_t *));
1424	int  (*get_bt_prefix)
1425		__P((DB *, size_t (**)(DB *, const DBT *, const DBT *)));
1426	int  (*get_byteswapped) __P((DB *, int *));
1427	int  (*get_cachesize) __P((DB *, u_int32_t *, u_int32_t *, int *));
1428	int  (*get_create_dir) __P((DB *, const char **));
1429	int  (*get_dbname) __P((DB *, const char **, const char **));
1430	int  (*get_dup_compare)
1431		__P((DB *, int (**)(DB *, const DBT *, const DBT *)));
1432	int  (*get_encrypt_flags) __P((DB *, u_int32_t *));
1433	DB_ENV *(*get_env) __P((DB *));
1434	void (*get_errcall) __P((DB *,
1435		void (**)(const DB_ENV *, const char *, const char *)));
1436	void (*get_errfile) __P((DB *, FILE **));
1437	void (*get_errpfx) __P((DB *, const char **));
1438	int  (*get_feedback) __P((DB *, void (**)(DB *, int, int)));
1439	int  (*get_flags) __P((DB *, u_int32_t *));
1440	int  (*get_h_compare)
1441		__P((DB *, int (**)(DB *, const DBT *, const DBT *)));
1442	int  (*get_h_ffactor) __P((DB *, u_int32_t *));
1443	int  (*get_h_hash)
1444		__P((DB *, u_int32_t (**)(DB *, const void *, u_int32_t)));
1445	int  (*get_h_nelem) __P((DB *, u_int32_t *));
1446	int  (*get_lorder) __P((DB *, int *));
1447	DB_MPOOLFILE *(*get_mpf) __P((DB *));
1448	void (*get_msgcall) __P((DB *, 
1449	    void (**)(const DB_ENV *, const char *)));
1450	void (*get_msgfile) __P((DB *, FILE **));
1451	int  (*get_multiple) __P((DB *));
1452	int  (*get_open_flags) __P((DB *, u_int32_t *));
1453	int  (*get_pagesize) __P((DB *, u_int32_t *));
1454	int  (*get_partition_callback) __P((DB *,
1455		u_int32_t *, u_int32_t (**)(DB *, DBT *key)));
1456	int  (*get_partition_dirs) __P((DB *, const char ***));
1457	int  (*get_partition_keys) __P((DB *, u_int32_t *, DBT **));
1458	int  (*get_priority) __P((DB *, DB_CACHE_PRIORITY *));
1459	int  (*get_q_extentsize) __P((DB *, u_int32_t *));
1460	int  (*get_re_delim) __P((DB *, int *));
1461	int  (*get_re_len) __P((DB *, u_int32_t *));
1462	int  (*get_re_pad) __P((DB *, int *));
1463	int  (*get_re_source) __P((DB *, const char **));
1464	int  (*get_transactional) __P((DB *));
1465	int  (*get_type) __P((DB *, DBTYPE *));
1466	int  (*join) __P((DB *, DBC **, DBC **, u_int32_t));
1467	int  (*key_range)
1468		__P((DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t));
1469	int  (*open) __P((DB *,
1470		DB_TXN *, const char *, const char *, DBTYPE, u_int32_t, int));
1471	int  (*pget) __P((DB *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t));
1472	int  (*put) __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
1473	int  (*remove) __P((DB *, const char *, const char *, u_int32_t));
1474	int  (*rename) __P((DB *,
1475		const char *, const char *, const char *, u_int32_t));
1476	int  (*set_alloc) __P((DB *, void *(*)(size_t),
1477		void *(*)(void *, size_t), void (*)(void *)));
1478	int  (*set_append_recno) __P((DB *, int (*)(DB *, DBT *, db_recno_t)));
1479	int  (*set_bt_compare)
1480		__P((DB *, int (*)(DB *, const DBT *, const DBT *)));
1481	int  (*set_bt_compress) __P((DB *,
1482		int (*)(DB *, const DBT *, const DBT *, const DBT *, const DBT *, DBT *),
1483		int (*)(DB *, const DBT *, const DBT *, DBT *, DBT *, DBT *)));
1484	int  (*set_bt_minkey) __P((DB *, u_int32_t));
1485	int  (*set_bt_prefix)
1486		__P((DB *, size_t (*)(DB *, const DBT *, const DBT *)));
1487	int  (*set_cachesize) __P((DB *, u_int32_t, u_int32_t, int));
1488	int  (*set_create_dir) __P((DB *, const char *));
1489	int  (*set_dup_compare)
1490		__P((DB *, int (*)(DB *, const DBT *, const DBT *)));
1491	int  (*set_encrypt) __P((DB *, const char *, u_int32_t));
1492	void (*set_errcall) __P((DB *,
1493		void (*)(const DB_ENV *, const char *, const char *)));
1494	void (*set_errfile) __P((DB *, FILE *));
1495	void (*set_errpfx) __P((DB *, const char *));
1496	int  (*set_feedback) __P((DB *, void (*)(DB *, int, int)));
1497	int  (*set_flags) __P((DB *, u_int32_t));
1498	int  (*set_h_compare)
1499		__P((DB *, int (*)(DB *, const DBT *, const DBT *)));
1500	int  (*set_h_ffactor) __P((DB *, u_int32_t));
1501	int  (*set_h_hash)
1502		__P((DB *, u_int32_t (*)(DB *, const void *, u_int32_t)));
1503	int  (*set_h_nelem) __P((DB *, u_int32_t));
1504	int  (*set_lorder) __P((DB *, int));
1505	void (*set_msgcall) __P((DB *, void (*)(const DB_ENV *, const char *)));
1506	void (*set_msgfile) __P((DB *, FILE *));
1507	int  (*set_pagesize) __P((DB *, u_int32_t));
1508	int  (*set_paniccall) __P((DB *, void (*)(DB_ENV *, int)));
1509	int  (*set_partition) __P((DB *,
1510		u_int32_t, DBT *, u_int32_t (*)(DB *, DBT *key)));
1511	int  (*set_partition_dirs) __P((DB *, const char **));
1512	int  (*set_priority) __P((DB *, DB_CACHE_PRIORITY));
1513	int  (*set_q_extentsize) __P((DB *, u_int32_t));
1514	int  (*set_re_delim) __P((DB *, int));
1515	int  (*set_re_len) __P((DB *, u_int32_t));
1516	int  (*set_re_pad) __P((DB *, int));
1517	int  (*set_re_source) __P((DB *, const char *));
1518	int  (*sort_multiple) __P((DB *, DBT *, DBT *, u_int32_t));
1519	int  (*stat) __P((DB *, DB_TXN *, void *, u_int32_t));
1520	int  (*stat_print) __P((DB *, u_int32_t));
1521	int  (*sync) __P((DB *, u_int32_t));
1522	int  (*truncate) __P((DB *, DB_TXN *, u_int32_t *, u_int32_t));
1523	int  (*upgrade) __P((DB *, const char *, u_int32_t));
1524	int  (*verify)
1525		__P((DB *, const char *, const char *, FILE *, u_int32_t));
1526	/* DB PUBLIC HANDLE LIST END */
1527
1528	/* DB PRIVATE HANDLE LIST BEGIN */
1529	int  (*dump) __P((DB *, const char *,
1530		int (*)(void *, const void *), void *, int, int));
1531	int  (*db_am_remove) __P((DB *, DB_THREAD_INFO *,
1532		DB_TXN *, const char *, const char *, u_int32_t));
1533	int  (*db_am_rename) __P((DB *, DB_THREAD_INFO *,
1534		DB_TXN *, const char *, const char *, const char *));
1535	/* DB PRIVATE HANDLE LIST END */
1536
1537	/*
1538	 * Never called; these are a place to save function pointers
1539	 * so that we can undo an associate.
1540	 */
1541	int  (*stored_get) __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
1542	int  (*stored_close) __P((DB *, u_int32_t));
1543
1544#define	DB_OK_BTREE	0x01
1545#define	DB_OK_HASH	0x02
1546#define	DB_OK_QUEUE	0x04
1547#define	DB_OK_RECNO	0x08
1548	u_int32_t	am_ok;		/* Legal AM choices. */
1549
1550	/*
1551	 * This field really ought to be an AM_FLAG, but we have
1552	 * have run out of bits.  If/when we decide to split up
1553	 * the flags, we can incorporate it.
1554	 */
1555	int	 preserve_fid;		/* Do not free fileid on close. */
1556
1557#define	DB_AM_CHKSUM		0x00000001 /* Checksumming */
1558#define	DB_AM_COMPENSATE	0x00000002 /* Created by compensating txn */
1559#define	DB_AM_COMPRESS		0x00000004 /* Compressed BTree */
1560#define	DB_AM_CREATED		0x00000008 /* Database was created upon open */
1561#define	DB_AM_CREATED_MSTR	0x00000010 /* Encompassing file was created */
1562#define	DB_AM_DBM_ERROR		0x00000020 /* Error in DBM/NDBM database */
1563#define	DB_AM_DELIMITER		0x00000040 /* Variable length delimiter set */
1564#define	DB_AM_DISCARD		0x00000080 /* Discard any cached pages */
1565#define	DB_AM_DUP		0x00000100 /* DB_DUP */
1566#define	DB_AM_DUPSORT		0x00000200 /* DB_DUPSORT */
1567#define	DB_AM_ENCRYPT		0x00000400 /* Encryption */
1568#define	DB_AM_FIXEDLEN		0x00000800 /* Fixed-length records */
1569#define	DB_AM_INMEM		0x00001000 /* In-memory; no sync on close */
1570#define	DB_AM_INORDER		0x00002000 /* DB_INORDER */
1571#define	DB_AM_IN_RENAME		0x00004000 /* File is being renamed */
1572#define	DB_AM_NOT_DURABLE	0x00008000 /* Do not log changes */
1573#define	DB_AM_OPEN_CALLED	0x00010000 /* DB->open called */
1574#define	DB_AM_PAD		0x00020000 /* Fixed-length record pad */
1575#define	DB_AM_PGDEF		0x00040000 /* Page size was defaulted */
1576#define	DB_AM_RDONLY		0x00080000 /* Database is readonly */
1577#define	DB_AM_READ_UNCOMMITTED	0x00100000 /* Support degree 1 isolation */
1578#define	DB_AM_RECNUM		0x00200000 /* DB_RECNUM */
1579#define	DB_AM_RECOVER		0x00400000 /* DB opened by recovery routine */
1580#define	DB_AM_RENUMBER		0x00800000 /* DB_RENUMBER */
1581#define	DB_AM_REVSPLITOFF	0x01000000 /* DB_REVSPLITOFF */
1582#define	DB_AM_SECONDARY		0x02000000 /* Database is a secondary index */
1583#define	DB_AM_SNAPSHOT		0x04000000 /* DB_SNAPSHOT */
1584#define	DB_AM_SUBDB		0x08000000 /* Subdatabases supported */
1585#define	DB_AM_SWAP		0x10000000 /* Pages need to be byte-swapped */
1586#define	DB_AM_TXN		0x20000000 /* Opened in a transaction */
1587#define	DB_AM_VERIFYING		0x40000000 /* DB handle is in the verifier */
1588	u_int32_t orig_flags;		   /* Flags at  open, for refresh */
1589	u_int32_t flags;
1590};
1591
1592/*
1593 * Macros for bulk operations.  These are only intended for the C API.
1594 * For C++, use DbMultiple*Iterator or DbMultiple*Builder.
1595 *
1596 * Bulk operations store multiple entries into a single DBT structure. The
1597 * following macros assist with creating and reading these Multiple DBTs.
1598 *
1599 * The basic layout for single data items is:
1600 *
1601 * -------------------------------------------------------------------------
1602 * | data1 | ... | dataN | ..... |-1 | dNLen | dNOff | ... | d1Len | d1Off |
1603 * -------------------------------------------------------------------------
1604 *
1605 * For the DB_MULTIPLE_KEY* macros, the items are in key/data pairs, so data1
1606 * would be a key, and data2 its corresponding value (N is always even).
1607 *
1608 * For the DB_MULTIPLE_RECNO* macros, the record number is stored along with
1609 * the len/off pair in the "header" section, and the list is zero terminated
1610 * (since -1 is a valid record number):
1611 *
1612 * --------------------------------------------------------------------------
1613 * | d1 |..| dN |..| 0 | dNLen | dNOff | recnoN |..| d1Len | d1Off | recno1 |
1614 * --------------------------------------------------------------------------
1615 */
1616#define	DB_MULTIPLE_INIT(pointer, dbt)					\
1617	(pointer = (u_int8_t *)(dbt)->data +				\
1618	    (dbt)->ulen - sizeof(u_int32_t))
1619
1620#define	DB_MULTIPLE_NEXT(pointer, dbt, retdata, retdlen)		\
1621	do {								\
1622		u_int32_t *__p = (u_int32_t *)(pointer);		\
1623		if (*__p == (u_int32_t)-1) {				\
1624			retdata = NULL;					\
1625			pointer = NULL;					\
1626			break;						\
1627		}							\
1628		retdata = (u_int8_t *)(dbt)->data + *__p--;		\
1629		retdlen = *__p--;					\
1630		pointer = __p;						\
1631		if (retdlen == 0 && retdata == (u_int8_t *)(dbt)->data)	\
1632			retdata = NULL;					\
1633	} while (0)
1634
1635#define	DB_MULTIPLE_KEY_NEXT(pointer, dbt, retkey, retklen, retdata, retdlen) \
1636	do {								\
1637		u_int32_t *__p = (u_int32_t *)(pointer);		\
1638		if (*__p == (u_int32_t)-1) {				\
1639			retdata = NULL;					\
1640			retkey = NULL;					\
1641			pointer = NULL;					\
1642			break;						\
1643		}							\
1644		retkey = (u_int8_t *)(dbt)->data + *__p--;		\
1645		retklen = *__p--;					\
1646		retdata = (u_int8_t *)(dbt)->data + *__p--;		\
1647		retdlen = *__p--;					\
1648		pointer = __p;						\
1649	} while (0)
1650
1651#define	DB_MULTIPLE_RECNO_NEXT(pointer, dbt, recno, retdata, retdlen)   \
1652	do {								\
1653		u_int32_t *__p = (u_int32_t *)(pointer);		\
1654		if (*__p == (u_int32_t)0) {				\
1655			recno = 0;					\
1656			retdata = NULL;					\
1657			pointer = NULL;					\
1658			break;						\
1659		}							\
1660		recno = *__p--;						\
1661		retdata = (u_int8_t *)(dbt)->data + *__p--;		\
1662		retdlen = *__p--;					\
1663		pointer = __p;						\
1664	} while (0)
1665
1666#define DB_MULTIPLE_WRITE_INIT(pointer, dbt)				\
1667	do {								\
1668		(dbt)->flags |= DB_DBT_BULK;				\
1669		pointer = (u_int8_t *)(dbt)->data +			\
1670		    (dbt)->ulen - sizeof(u_int32_t);			\
1671		*(u_int32_t *)(pointer) = (u_int32_t)-1;		\
1672	} while (0)
1673
1674#define DB_MULTIPLE_RESERVE_NEXT(pointer, dbt, writedata, writedlen)	\
1675	do {								\
1676		u_int32_t *__p = (u_int32_t *)(pointer);		\
1677		u_int32_t __off = ((pointer) ==	(u_int8_t *)(dbt)->data +\
1678		    (dbt)->ulen - sizeof(u_int32_t)) ?  0 : __p[1] + __p[2];\
1679		if ((u_int8_t *)(dbt)->data + __off + (writedlen) >	\
1680		    (u_int8_t *)(__p - 2))				\
1681			writedata = NULL;				\
1682		else {							\
1683			writedata = (u_int8_t *)(dbt)->data + __off;	\
1684			__p[0] = __off;					\
1685			__p[-1] = (writedlen);				\
1686			__p[-2] = (u_int32_t)-1;			\
1687			pointer = __p - 2;				\
1688		}							\
1689	} while (0)
1690
1691#define DB_MULTIPLE_WRITE_NEXT(pointer, dbt, writedata, writedlen)	\
1692	do {								\
1693		void *__destd;						\
1694		DB_MULTIPLE_RESERVE_NEXT((pointer), (dbt),		\
1695		    __destd, (writedlen));				\
1696		if (__destd == NULL)					\
1697			pointer = NULL;					\
1698		else							\
1699			memcpy(__destd, (writedata), (writedlen));	\
1700	} while (0)
1701
1702#define DB_MULTIPLE_KEY_RESERVE_NEXT(pointer, dbt, writekey, writeklen, writedata, writedlen) \
1703	do {								\
1704		u_int32_t *__p = (u_int32_t *)(pointer);		\
1705		u_int32_t __off = ((pointer) == (u_int8_t *)(dbt)->data +\
1706		    (dbt)->ulen - sizeof(u_int32_t)) ?  0 : __p[1] + __p[2];\
1707		if ((u_int8_t *)(dbt)->data + __off + (writeklen) +	\
1708		    (writedlen) > (u_int8_t *)(__p - 4)) {		\
1709			writekey = NULL;				\
1710			writedata = NULL;				\
1711		} else {						\
1712			writekey = (u_int8_t *)(dbt)->data + __off;	\
1713			__p[0] = __off;					\
1714			__p[-1] = (writeklen);				\
1715			__p -= 2;					\
1716			__off += (writeklen);				\
1717			writedata = (u_int8_t *)(dbt)->data + __off;	\
1718			__p[0] = __off;					\
1719			__p[-1] = (writedlen);				\
1720			__p[-2] = (u_int32_t)-1;			\
1721			pointer = __p - 2;				\
1722		}							\
1723	} while (0)
1724
1725#define DB_MULTIPLE_KEY_WRITE_NEXT(pointer, dbt, writekey, writeklen, writedata, writedlen) \
1726	do {								\
1727		void *__destk, *__destd;				\
1728		DB_MULTIPLE_KEY_RESERVE_NEXT((pointer), (dbt),		\
1729		    __destk, (writeklen), __destd, (writedlen));	\
1730		if (__destk == NULL)					\
1731			pointer = NULL;					\
1732		else {							\
1733			memcpy(__destk, (writekey), (writeklen));	\
1734			if (__destd != NULL)				\
1735				memcpy(__destd, (writedata), (writedlen));\
1736		}							\
1737	} while (0)
1738
1739#define DB_MULTIPLE_RECNO_WRITE_INIT(pointer, dbt)			\
1740	do {								\
1741		(dbt)->flags |= DB_DBT_BULK;				\
1742		pointer = (u_int8_t *)(dbt)->data +			\
1743		    (dbt)->ulen - sizeof(u_int32_t);			\
1744		*(u_int32_t *)(pointer) = 0;				\
1745	} while (0)
1746
1747#define DB_MULTIPLE_RECNO_RESERVE_NEXT(pointer, dbt, recno, writedata, writedlen) \
1748	do {								\
1749		u_int32_t *__p = (u_int32_t *)(pointer);		\
1750		u_int32_t __off = ((pointer) == (u_int8_t *)(dbt)->data +\
1751		    (dbt)->ulen - sizeof(u_int32_t)) ? 0 : __p[1] + __p[2]; \
1752		if (((u_int8_t *)(dbt)->data + __off) + (writedlen) >	\
1753		    (u_int8_t *)(__p - 3))				\
1754			writedata = NULL;				\
1755		else {							\
1756			writedata = (u_int8_t *)(dbt)->data + __off;	\
1757			__p[0] = (u_int32_t)(recno);			\
1758			__p[-1] = __off;				\
1759			__p[-2] = (writedlen);				\
1760			__p[-3] = 0;					\
1761			pointer = __p - 3;				\
1762		}							\
1763	} while (0)
1764
1765#define DB_MULTIPLE_RECNO_WRITE_NEXT(pointer, dbt, recno, writedata, writedlen)\
1766	do {								\
1767		void *__destd;						\
1768		DB_MULTIPLE_RECNO_RESERVE_NEXT((pointer), (dbt),	\
1769		    (recno), __destd, (writedlen));			\
1770		if (__destd == NULL)					\
1771			pointer = NULL;					\
1772		else if ((writedlen) != 0)				\
1773			memcpy(__destd, (writedata), (writedlen));	\
1774	} while (0)
1775
1776/*******************************************************
1777 * Access method cursors.
1778 *******************************************************/
1779struct __dbc {
1780	DB *dbp;			/* Backing database */
1781	DB_ENV *dbenv;			/* Backing environment */
1782	ENV *env;			/* Backing environment */
1783
1784	DB_THREAD_INFO *thread_info;	/* Thread that owns this cursor. */
1785	DB_TXN	 *txn;			/* Associated transaction. */
1786	DB_CACHE_PRIORITY priority;	/* Priority in cache. */
1787
1788	/*
1789	 * Active/free cursor queues.
1790	 *
1791	 * !!!
1792	 * Explicit representations of structures from queue.h.
1793	 * TAILQ_ENTRY(__dbc) links;
1794	 */
1795	struct {
1796		DBC *tqe_next;
1797		DBC **tqe_prev;
1798	} links;
1799
1800	/*
1801	 * The DBT *'s below are used by the cursor routines to return
1802	 * data to the user when DBT flags indicate that DB should manage
1803	 * the returned memory.  They point at a DBT containing the buffer
1804	 * and length that will be used, and "belonging" to the handle that
1805	 * should "own" this memory.  This may be a "my_*" field of this
1806	 * cursor--the default--or it may be the corresponding field of
1807	 * another cursor, a DB handle, a join cursor, etc.  In general, it
1808	 * will be whatever handle the user originally used for the current
1809	 * DB interface call.
1810	 */
1811	DBT	 *rskey;		/* Returned secondary key. */
1812	DBT	 *rkey;			/* Returned [primary] key. */
1813	DBT	 *rdata;		/* Returned data. */
1814
1815	DBT	  my_rskey;		/* Space for returned secondary key. */
1816	DBT	  my_rkey;		/* Space for returned [primary] key. */
1817	DBT	  my_rdata;		/* Space for returned data. */
1818
1819	DB_LOCKER *lref;		/* Reference to default locker. */
1820	DB_LOCKER *locker;		/* Locker for this operation. */
1821	DBT	  lock_dbt;		/* DBT referencing lock. */
1822	DB_LOCK_ILOCK lock;		/* Object to be locked. */
1823	DB_LOCK	  mylock;		/* CDB lock held on this cursor. */
1824
1825	u_int	  cl_id;		/* Remote client id. */
1826
1827	DBTYPE	  dbtype;		/* Cursor type. */
1828
1829	DBC_INTERNAL *internal;		/* Access method private. */
1830
1831	/* DBC PUBLIC HANDLE LIST BEGIN */
1832	int (*close) __P((DBC *));
1833	int (*cmp) __P((DBC *, DBC *, int *, u_int32_t));
1834	int (*count) __P((DBC *, db_recno_t *, u_int32_t));
1835	int (*del) __P((DBC *, u_int32_t));
1836	int (*dup) __P((DBC *, DBC **, u_int32_t));
1837	int (*get) __P((DBC *, DBT *, DBT *, u_int32_t));
1838	int (*get_priority) __P((DBC *, DB_CACHE_PRIORITY *));
1839	int (*pget) __P((DBC *, DBT *, DBT *, DBT *, u_int32_t));
1840	int (*put) __P((DBC *, DBT *, DBT *, u_int32_t));
1841	int (*set_priority) __P((DBC *, DB_CACHE_PRIORITY));
1842	/* DBC PUBLIC HANDLE LIST END */
1843
1844	/* The following are the method names deprecated in the 4.6 release. */
1845	int (*c_close) __P((DBC *));
1846	int (*c_count) __P((DBC *, db_recno_t *, u_int32_t));
1847	int (*c_del) __P((DBC *, u_int32_t));
1848	int (*c_dup) __P((DBC *, DBC **, u_int32_t));
1849	int (*c_get) __P((DBC *, DBT *, DBT *, u_int32_t));
1850	int (*c_pget) __P((DBC *, DBT *, DBT *, DBT *, u_int32_t));
1851	int (*c_put) __P((DBC *, DBT *, DBT *, u_int32_t));
1852
1853	/* DBC PRIVATE HANDLE LIST BEGIN */
1854	int (*am_bulk) __P((DBC *, DBT *, u_int32_t));
1855	int (*am_close) __P((DBC *, db_pgno_t, int *));
1856	int (*am_del) __P((DBC *, u_int32_t));
1857	int (*am_destroy) __P((DBC *));
1858	int (*am_get) __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
1859	int (*am_put) __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
1860	int (*am_writelock) __P((DBC *));
1861	/* DBC PRIVATE HANDLE LIST END */
1862
1863/*
1864 * DBC_DONTLOCK and DBC_RECOVER are used during recovery and transaction
1865 * abort.  If a transaction is being aborted or recovered then DBC_RECOVER
1866 * will be set and locking and logging will be disabled on this cursor.  If
1867 * we are performing a compensating transaction (e.g. free page processing)
1868 * then DB_DONTLOCK will be set to inhibit locking, but logging will still
1869 * be required. DB_DONTLOCK is also used if the whole database is locked.
1870 */
1871#define	DBC_ACTIVE		0x00001	/* Cursor in use. */
1872#define	DBC_BULK		0x00002	/* Bulk update cursor. */
1873#define	DBC_DONTLOCK		0x00004	/* Don't lock on this cursor. */
1874#define	DBC_DOWNREV		0x00008	/* Down rev replication master. */
1875#define	DBC_DUPLICATE		0x00010	/* Create a duplicate cursor. */
1876#define	DBC_FROM_DB_GET		0x00020 /* Called from the DB->get() method. */
1877#define	DBC_MULTIPLE		0x00040	/* Return Multiple data. */
1878#define	DBC_MULTIPLE_KEY	0x00080	/* Return Multiple keys and data. */
1879#define	DBC_OPD			0x00100	/* Cursor references off-page dups. */
1880#define	DBC_OWN_LID		0x00200	/* Free lock id on destroy. */
1881#define	DBC_PARTITIONED		0x00400	/* Cursor for a partitioned db. */
1882#define	DBC_READ_COMMITTED	0x00800	/* Cursor has degree 2 isolation. */
1883#define	DBC_READ_UNCOMMITTED	0x01000	/* Cursor has degree 1 isolation. */
1884#define	DBC_RECOVER		0x02000	/* Recovery cursor; don't log/lock. */
1885#define	DBC_RMW			0x04000	/* Acquire write flag in read op. */
1886#define	DBC_TRANSIENT		0x08000	/* Cursor is transient. */
1887#define	DBC_WAS_READ_COMMITTED	0x10000	/* Cursor holds a read commited lock. */
1888#define	DBC_WRITECURSOR		0x20000	/* Cursor may be used to write (CDB). */
1889#define	DBC_WRITER		0x40000	/* Cursor immediately writing (CDB). */
1890	u_int32_t flags;
1891};
1892
1893/* Key range statistics structure */
1894struct __key_range {
1895	double less;
1896	double equal;
1897	double greater;
1898};
1899
1900/* Btree/Recno statistics structure. */
1901struct __db_bt_stat {
1902	u_int32_t bt_magic;		/* Magic number. */
1903	u_int32_t bt_version;		/* Version number. */
1904	u_int32_t bt_metaflags;		/* Metadata flags. */
1905	u_int32_t bt_nkeys;		/* Number of unique keys. */
1906	u_int32_t bt_ndata;		/* Number of data items. */
1907	u_int32_t bt_pagecnt;		/* Page count. */
1908	u_int32_t bt_pagesize;		/* Page size. */
1909	u_int32_t bt_minkey;		/* Minkey value. */
1910	u_int32_t bt_re_len;		/* Fixed-length record length. */
1911	u_int32_t bt_re_pad;		/* Fixed-length record pad. */
1912	u_int32_t bt_levels;		/* Tree levels. */
1913	u_int32_t bt_int_pg;		/* Internal pages. */
1914	u_int32_t bt_leaf_pg;		/* Leaf pages. */
1915	u_int32_t bt_dup_pg;		/* Duplicate pages. */
1916	u_int32_t bt_over_pg;		/* Overflow pages. */
1917	u_int32_t bt_empty_pg;		/* Empty pages. */
1918	u_int32_t bt_free;		/* Pages on the free list. */
1919	uintmax_t bt_int_pgfree;	/* Bytes free in internal pages. */
1920	uintmax_t bt_leaf_pgfree;	/* Bytes free in leaf pages. */
1921	uintmax_t bt_dup_pgfree;	/* Bytes free in duplicate pages. */
1922	uintmax_t bt_over_pgfree;	/* Bytes free in overflow pages. */
1923};
1924
1925struct __db_compact {
1926	/* Input Parameters. */
1927	u_int32_t	compact_fillpercent;	/* Desired fillfactor: 1-100 */
1928	db_timeout_t	compact_timeout;	/* Lock timeout. */
1929	u_int32_t	compact_pages;		/* Max pages to process. */
1930	/* Output Stats. */
1931	u_int32_t	compact_pages_free;	/* Number of pages freed. */
1932	u_int32_t	compact_pages_examine;	/* Number of pages examine. */
1933	u_int32_t	compact_levels;		/* Number of levels removed. */
1934	u_int32_t	compact_deadlock;	/* Number of deadlocks. */
1935	db_pgno_t	compact_pages_truncated; /* Pages truncated to OS. */
1936	/* Internal. */
1937	db_pgno_t	compact_truncate;	/* Page number for truncation */
1938};
1939
1940/* Hash statistics structure. */
1941struct __db_h_stat {
1942	u_int32_t hash_magic;		/* Magic number. */
1943	u_int32_t hash_version;		/* Version number. */
1944	u_int32_t hash_metaflags;	/* Metadata flags. */
1945	u_int32_t hash_nkeys;		/* Number of unique keys. */
1946	u_int32_t hash_ndata;		/* Number of data items. */
1947	u_int32_t hash_pagecnt;		/* Page count. */
1948	u_int32_t hash_pagesize;	/* Page size. */
1949	u_int32_t hash_ffactor;		/* Fill factor specified at create. */
1950	u_int32_t hash_buckets;		/* Number of hash buckets. */
1951	u_int32_t hash_free;		/* Pages on the free list. */
1952	uintmax_t hash_bfree;		/* Bytes free on bucket pages. */
1953	u_int32_t hash_bigpages;	/* Number of big key/data pages. */
1954	uintmax_t hash_big_bfree;	/* Bytes free on big item pages. */
1955	u_int32_t hash_overflows;	/* Number of overflow pages. */
1956	uintmax_t hash_ovfl_free;	/* Bytes free on ovfl pages. */
1957	u_int32_t hash_dup;		/* Number of dup pages. */
1958	uintmax_t hash_dup_free;	/* Bytes free on duplicate pages. */
1959};
1960
1961/* Queue statistics structure. */
1962struct __db_qam_stat {
1963	u_int32_t qs_magic;		/* Magic number. */
1964	u_int32_t qs_version;		/* Version number. */
1965	u_int32_t qs_metaflags;		/* Metadata flags. */
1966	u_int32_t qs_nkeys;		/* Number of unique keys. */
1967	u_int32_t qs_ndata;		/* Number of data items. */
1968	u_int32_t qs_pagesize;		/* Page size. */
1969	u_int32_t qs_extentsize;	/* Pages per extent. */
1970	u_int32_t qs_pages;		/* Data pages. */
1971	u_int32_t qs_re_len;		/* Fixed-length record length. */
1972	u_int32_t qs_re_pad;		/* Fixed-length record pad. */
1973	u_int32_t qs_pgfree;		/* Bytes free in data pages. */
1974	u_int32_t qs_first_recno;	/* First not deleted record. */
1975	u_int32_t qs_cur_recno;		/* Next available record number. */
1976};
1977
1978/*******************************************************
1979 * Environment.
1980 *******************************************************/
1981#define	DB_REGION_MAGIC	0x120897	/* Environment magic number. */
1982
1983/*
1984 * Database environment structure.
1985 *
1986 * This is the public database environment handle.  The private environment
1987 * handle is the ENV structure.   The user owns this structure, the library
1988 * owns the ENV structure.  The reason there are two structures is because
1989 * the user's configuration outlives any particular DB_ENV->open call, and
1990 * separate structures allows us to easily discard internal information without
1991 * discarding the user's configuration.
1992 *
1993 * Fields in the DB_ENV structure should normally be set only by application
1994 * DB_ENV handle methods.
1995 */
1996struct __db_env {
1997	ENV *env;			/* Linked ENV structure */
1998
1999	/*
2000	 * The DB_ENV structure can be used concurrently, so field access is
2001	 * protected.
2002	 */
2003	db_mutex_t mtx_db_env;		/* DB_ENV structure mutex */
2004
2005					/* Error message callback */
2006	void (*db_errcall) __P((const DB_ENV *, const char *, const char *));
2007	FILE		*db_errfile;	/* Error message file stream */
2008	const char	*db_errpfx;	/* Error message prefix */
2009
2010					/* Other message callback */
2011	void (*db_msgcall) __P((const DB_ENV *, const char *));
2012	FILE		*db_msgfile;	/* Other message file stream */
2013
2014	/* Other application callback functions */
2015	int   (*app_dispatch) __P((DB_ENV *, DBT *, DB_LSN *, db_recops));
2016	void  (*db_event_func) __P((DB_ENV *, u_int32_t, void *));
2017	void  (*db_feedback) __P((DB_ENV *, int, int));
2018	void  (*db_free) __P((void *));
2019	void  (*db_paniccall) __P((DB_ENV *, int));
2020	void *(*db_malloc) __P((size_t));
2021	void *(*db_realloc) __P((void *, size_t));
2022	int   (*is_alive) __P((DB_ENV *, pid_t, db_threadid_t, u_int32_t));
2023	void  (*thread_id) __P((DB_ENV *, pid_t *, db_threadid_t *));
2024	char *(*thread_id_string) __P((DB_ENV *, pid_t, db_threadid_t, char *));
2025
2026	/* Application specified paths */
2027	char	*db_log_dir;		/* Database log file directory */
2028	char	*db_tmp_dir;		/* Database tmp file directory */
2029
2030	char    *db_create_dir;		/* Create directory for data files */
2031	char   **db_data_dir;		/* Database data file directories */
2032	int	 data_cnt;		/* Database data file slots */
2033	int	 data_next;		/* Next database data file slot */
2034
2035	char	*intermediate_dir_mode;	/* Intermediate directory perms */
2036
2037	long	 shm_key;		/* shmget key */
2038
2039	char	*passwd;		/* Cryptography support */
2040	size_t	 passwd_len;
2041
2042	void	*cl_handle;		/* RPC: remote client handle */
2043	u_int	 cl_id;			/* RPC: remote client env id */
2044
2045	/* Private handle references */
2046	void	*app_private;		/* Application-private handle */
2047	void	*api1_internal;		/* C++, Perl API private */
2048	void	*api2_internal;		/* Java API private */
2049
2050	u_int32_t	verbose;	/* DB_VERB_XXX flags */
2051
2052	/* Mutex configuration */
2053	u_int32_t	mutex_align;	/* Mutex alignment */
2054	u_int32_t	mutex_cnt;	/* Number of mutexes to configure */
2055	u_int32_t	mutex_inc;	/* Number of mutexes to add */
2056	u_int32_t	mutex_tas_spins;/* Test-and-set spin count */
2057
2058	/* Locking configuration */
2059	u_int8_t       *lk_conflicts;	/* Two dimensional conflict matrix */
2060	int		lk_modes;	/* Number of lock modes in table */
2061	u_int32_t	lk_detect;	/* Deadlock detect on all conflicts */
2062	u_int32_t	lk_max;	/* Maximum number of locks */
2063	u_int32_t	lk_max_lockers;/* Maximum number of lockers */
2064	u_int32_t	lk_max_objects;/* Maximum number of locked objects */
2065	u_int32_t	lk_partitions ;/* Number of object partitions */
2066	db_timeout_t	lk_timeout;	/* Lock timeout period */
2067
2068	/* Logging configuration */
2069	u_int32_t	lg_bsize;	/* Buffer size */
2070	int		lg_filemode;	/* Log file permission mode */
2071	u_int32_t	lg_regionmax;	/* Region size */
2072	u_int32_t	lg_size;	/* Log file size */
2073	u_int32_t	lg_flags;	/* Log configuration */
2074
2075	/* Memory pool configuration */
2076	u_int32_t	mp_gbytes;	/* Cache size: GB */
2077	u_int32_t	mp_bytes;	/* Cache size: bytes */
2078	u_int32_t	mp_max_gbytes;	/* Maximum cache size: GB */
2079	u_int32_t	mp_max_bytes;	/* Maximum cache size: bytes */
2080	size_t		mp_mmapsize;	/* Maximum file size for mmap */
2081	int		mp_maxopenfd;	/* Maximum open file descriptors */
2082	int		mp_maxwrite;	/* Maximum buffers to write */
2083	u_int		mp_ncache;	/* Initial number of cache regions */
2084	u_int32_t	mp_pagesize;	/* Average page size */
2085	u_int32_t	mp_tablesize;	/* Approximate hash table size */
2086					/* Sleep after writing max buffers */
2087	db_timeout_t	mp_maxwrite_sleep;
2088
2089	/* Transaction configuration */
2090	u_int32_t	tx_max;		/* Maximum number of transactions */
2091	time_t		tx_timestamp;	/* Recover to specific timestamp */
2092	db_timeout_t	tx_timeout;	/* Timeout for transactions */
2093
2094	/* Thread tracking configuration */
2095	u_int32_t	thr_max;	/* Thread count */
2096
2097	/*
2098	 * The following fields are not strictly user-owned, but they outlive
2099	 * the ENV structure, and so are stored here.
2100	 */
2101	DB_FH		*registry;	/* DB_REGISTER file handle */
2102	u_int32_t	registry_off;	/*
2103					 * Offset of our slot.  We can't use
2104					 * off_t because its size depends on
2105					 * build settings.
2106					 */
2107        db_timeout_t	envreg_timeout; /* DB_REGISTER wait timeout */ 
2108
2109#define	DB_ENV_AUTO_COMMIT	0x00000001 /* DB_AUTO_COMMIT */
2110#define	DB_ENV_CDB_ALLDB	0x00000002 /* CDB environment wide locking */
2111#define	DB_ENV_FAILCHK		0x00000004 /* Failchk is running */
2112#define	DB_ENV_DIRECT_DB	0x00000008 /* DB_DIRECT_DB set */
2113#define	DB_ENV_DSYNC_DB		0x00000010 /* DB_DSYNC_DB set */
2114#define	DB_ENV_MULTIVERSION	0x00000020 /* DB_MULTIVERSION set */
2115#define	DB_ENV_NOLOCKING	0x00000040 /* DB_NOLOCKING set */
2116#define	DB_ENV_NOMMAP		0x00000080 /* DB_NOMMAP set */
2117#define	DB_ENV_NOPANIC		0x00000100 /* Okay if panic set */
2118#define	DB_ENV_OVERWRITE	0x00000200 /* DB_OVERWRITE set */
2119#define	DB_ENV_REGION_INIT	0x00000400 /* DB_REGION_INIT set */
2120#define	DB_ENV_RPCCLIENT	0x00000800 /* DB_RPCCLIENT set */
2121#define	DB_ENV_RPCCLIENT_GIVEN	0x00001000 /* User-supplied RPC client struct */
2122#define	DB_ENV_TIME_NOTGRANTED	0x00002000 /* DB_TIME_NOTGRANTED set */
2123#define	DB_ENV_TXN_NOSYNC	0x00004000 /* DB_TXN_NOSYNC set */
2124#define	DB_ENV_TXN_NOWAIT	0x00008000 /* DB_TXN_NOWAIT set */
2125#define	DB_ENV_TXN_SNAPSHOT	0x00010000 /* DB_TXN_SNAPSHOT set */
2126#define	DB_ENV_TXN_WRITE_NOSYNC	0x00020000 /* DB_TXN_WRITE_NOSYNC set */
2127#define	DB_ENV_YIELDCPU		0x00040000 /* DB_YIELDCPU set */
2128	u_int32_t flags;
2129
2130	/* DB_ENV PUBLIC HANDLE LIST BEGIN */
2131	int  (*add_data_dir) __P((DB_ENV *, const char *));
2132	int  (*cdsgroup_begin) __P((DB_ENV *, DB_TXN **));
2133	int  (*close) __P((DB_ENV *, u_int32_t));
2134	int  (*dbremove) __P((DB_ENV *,
2135		DB_TXN *, const char *, const char *, u_int32_t));
2136	int  (*dbrename) __P((DB_ENV *,
2137		DB_TXN *, const char *, const char *, const char *, u_int32_t));
2138	void (*err) __P((const DB_ENV *, int, const char *, ...));
2139	void (*errx) __P((const DB_ENV *, const char *, ...));
2140	int  (*failchk) __P((DB_ENV *, u_int32_t));
2141	int  (*fileid_reset) __P((DB_ENV *, const char *, u_int32_t));
2142	int  (*get_alloc) __P((DB_ENV *, void *(**)(size_t),
2143		void *(**)(void *, size_t), void (**)(void *)));
2144	int  (*get_app_dispatch)
2145		__P((DB_ENV *, int (**)(DB_ENV *, DBT *, DB_LSN *, db_recops)));
2146	int  (*get_cache_max) __P((DB_ENV *, u_int32_t *, u_int32_t *));
2147	int  (*get_cachesize) __P((DB_ENV *, u_int32_t *, u_int32_t *, int *));
2148	int  (*get_create_dir) __P((DB_ENV *, const char **));
2149	int  (*get_data_dirs) __P((DB_ENV *, const char ***));
2150	int  (*get_encrypt_flags) __P((DB_ENV *, u_int32_t *));
2151	void (*get_errcall) __P((DB_ENV *,
2152		void (**)(const DB_ENV *, const char *, const char *)));
2153	void (*get_errfile) __P((DB_ENV *, FILE **));
2154	void (*get_errpfx) __P((DB_ENV *, const char **));
2155	int  (*get_flags) __P((DB_ENV *, u_int32_t *));
2156	int  (*get_feedback) __P((DB_ENV *, void (**)(DB_ENV *, int, int)));
2157	int  (*get_home) __P((DB_ENV *, const char **));
2158	int  (*get_intermediate_dir_mode) __P((DB_ENV *, const char **));
2159	int  (*get_isalive) __P((DB_ENV *,
2160		int (**)(DB_ENV *, pid_t, db_threadid_t, u_int32_t)));
2161	int  (*get_lg_bsize) __P((DB_ENV *, u_int32_t *));
2162	int  (*get_lg_dir) __P((DB_ENV *, const char **));
2163	int  (*get_lg_filemode) __P((DB_ENV *, int *));
2164	int  (*get_lg_max) __P((DB_ENV *, u_int32_t *));
2165	int  (*get_lg_regionmax) __P((DB_ENV *, u_int32_t *));
2166	int  (*get_lk_conflicts) __P((DB_ENV *, const u_int8_t **, int *));
2167	int  (*get_lk_detect) __P((DB_ENV *, u_int32_t *));
2168	int  (*get_lk_max_lockers) __P((DB_ENV *, u_int32_t *));
2169	int  (*get_lk_max_locks) __P((DB_ENV *, u_int32_t *));
2170	int  (*get_lk_max_objects) __P((DB_ENV *, u_int32_t *));
2171	int  (*get_lk_partitions) __P((DB_ENV *, u_int32_t *));
2172	int  (*get_mp_max_openfd) __P((DB_ENV *, int *));
2173	int  (*get_mp_max_write) __P((DB_ENV *, int *, db_timeout_t *));
2174	int  (*get_mp_mmapsize) __P((DB_ENV *, size_t *));
2175	int  (*get_mp_pagesize) __P((DB_ENV *, u_int32_t *));
2176	int  (*get_mp_tablesize) __P((DB_ENV *, u_int32_t *));
2177	void (*get_msgcall)
2178		__P((DB_ENV *, void (**)(const DB_ENV *, const char *)));
2179	void (*get_msgfile) __P((DB_ENV *, FILE **));
2180	int  (*get_open_flags) __P((DB_ENV *, u_int32_t *));
2181	int  (*get_shm_key) __P((DB_ENV *, long *));
2182	int  (*get_thread_count) __P((DB_ENV *, u_int32_t *));
2183	int  (*get_thread_id_fn)
2184		__P((DB_ENV *, void (**)(DB_ENV *, pid_t *, db_threadid_t *)));
2185	int  (*get_thread_id_string_fn) __P((DB_ENV *,
2186		char *(**)(DB_ENV *, pid_t, db_threadid_t, char *)));
2187	int  (*get_timeout) __P((DB_ENV *, db_timeout_t *, u_int32_t));
2188	int  (*get_tmp_dir) __P((DB_ENV *, const char **));
2189	int  (*get_tx_max) __P((DB_ENV *, u_int32_t *));
2190	int  (*get_tx_timestamp) __P((DB_ENV *, time_t *));
2191	int  (*get_verbose) __P((DB_ENV *, u_int32_t, int *));
2192	int  (*is_bigendian) __P((void));
2193	int  (*lock_detect) __P((DB_ENV *, u_int32_t, u_int32_t, int *));
2194	int  (*lock_get) __P((DB_ENV *,
2195		u_int32_t, u_int32_t, DBT *, db_lockmode_t, DB_LOCK *));
2196	int  (*lock_id) __P((DB_ENV *, u_int32_t *));
2197	int  (*lock_id_free) __P((DB_ENV *, u_int32_t));
2198	int  (*lock_put) __P((DB_ENV *, DB_LOCK *));
2199	int  (*lock_stat) __P((DB_ENV *, DB_LOCK_STAT **, u_int32_t));
2200	int  (*lock_stat_print) __P((DB_ENV *, u_int32_t));
2201	int  (*lock_vec) __P((DB_ENV *,
2202		u_int32_t, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **));
2203	int  (*log_archive) __P((DB_ENV *, char **[], u_int32_t));
2204	int  (*log_cursor) __P((DB_ENV *, DB_LOGC **, u_int32_t));
2205	int  (*log_file) __P((DB_ENV *, const DB_LSN *, char *, size_t));
2206	int  (*log_flush) __P((DB_ENV *, const DB_LSN *));
2207	int  (*log_get_config) __P((DB_ENV *, u_int32_t, int *));
2208	int  (*log_printf) __P((DB_ENV *, DB_TXN *, const char *, ...));
2209	int  (*log_put) __P((DB_ENV *, DB_LSN *, const DBT *, u_int32_t));
2210	int  (*log_set_config) __P((DB_ENV *, u_int32_t, int));
2211	int  (*log_stat) __P((DB_ENV *, DB_LOG_STAT **, u_int32_t));
2212	int  (*log_stat_print) __P((DB_ENV *, u_int32_t));
2213	int  (*lsn_reset) __P((DB_ENV *, const char *, u_int32_t));
2214	int  (*memp_fcreate) __P((DB_ENV *, DB_MPOOLFILE **, u_int32_t));
2215	int  (*memp_register) __P((DB_ENV *, int, int (*)(DB_ENV *, db_pgno_t,
2216		void *, DBT *), int (*)(DB_ENV *, db_pgno_t, void *, DBT *)));
2217	int  (*memp_stat) __P((DB_ENV *,
2218		DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, u_int32_t));
2219	int  (*memp_stat_print) __P((DB_ENV *, u_int32_t));
2220	int  (*memp_sync) __P((DB_ENV *, DB_LSN *));
2221	int  (*memp_trickle) __P((DB_ENV *, int, int *));
2222	int  (*mutex_alloc) __P((DB_ENV *, u_int32_t, db_mutex_t *));
2223	int  (*mutex_free) __P((DB_ENV *, db_mutex_t));
2224	int  (*mutex_get_align) __P((DB_ENV *, u_int32_t *));
2225	int  (*mutex_get_increment) __P((DB_ENV *, u_int32_t *));
2226	int  (*mutex_get_max) __P((DB_ENV *, u_int32_t *));
2227	int  (*mutex_get_tas_spins) __P((DB_ENV *, u_int32_t *));
2228	int  (*mutex_lock) __P((DB_ENV *, db_mutex_t));
2229	int  (*mutex_set_align) __P((DB_ENV *, u_int32_t));
2230	int  (*mutex_set_increment) __P((DB_ENV *, u_int32_t));
2231	int  (*mutex_set_max) __P((DB_ENV *, u_int32_t));
2232	int  (*mutex_set_tas_spins) __P((DB_ENV *, u_int32_t));
2233	int  (*mutex_stat) __P((DB_ENV *, DB_MUTEX_STAT **, u_int32_t));
2234	int  (*mutex_stat_print) __P((DB_ENV *, u_int32_t));
2235	int  (*mutex_unlock) __P((DB_ENV *, db_mutex_t));
2236	int  (*open) __P((DB_ENV *, const char *, u_int32_t, int));
2237	int  (*remove) __P((DB_ENV *, const char *, u_int32_t));
2238	int  (*rep_elect) __P((DB_ENV *, u_int32_t, u_int32_t, u_int32_t));
2239	int  (*rep_flush) __P((DB_ENV *));
2240	int  (*rep_get_clockskew) __P((DB_ENV *, u_int32_t *, u_int32_t *));
2241	int  (*rep_get_config) __P((DB_ENV *, u_int32_t, int *));
2242	int  (*rep_get_limit) __P((DB_ENV *, u_int32_t *, u_int32_t *));
2243	int  (*rep_get_nsites) __P((DB_ENV *, u_int32_t *));
2244	int  (*rep_get_priority) __P((DB_ENV *, u_int32_t *));
2245	int  (*rep_get_request) __P((DB_ENV *, u_int32_t *, u_int32_t *));
2246	int  (*rep_get_timeout) __P((DB_ENV *, int, u_int32_t *));
2247	int  (*rep_process_message)
2248		__P((DB_ENV *, DBT *, DBT *, int, DB_LSN *));
2249	int  (*rep_set_clockskew) __P((DB_ENV *, u_int32_t, u_int32_t));
2250	int  (*rep_set_config) __P((DB_ENV *, u_int32_t, int));
2251	int  (*rep_set_limit) __P((DB_ENV *, u_int32_t, u_int32_t));
2252	int  (*rep_set_nsites) __P((DB_ENV *, u_int32_t));
2253	int  (*rep_set_priority) __P((DB_ENV *, u_int32_t));
2254	int  (*rep_set_request) __P((DB_ENV *, u_int32_t, u_int32_t));
2255	int  (*rep_set_timeout) __P((DB_ENV *, int, db_timeout_t));
2256	int  (*rep_set_transport) __P((DB_ENV *, int, int (*)(DB_ENV *,
2257		const DBT *, const DBT *, const DB_LSN *, int, u_int32_t)));
2258	int  (*rep_start) __P((DB_ENV *, DBT *, u_int32_t));
2259	int  (*rep_stat) __P((DB_ENV *, DB_REP_STAT **, u_int32_t));
2260	int  (*rep_stat_print) __P((DB_ENV *, u_int32_t));
2261	int  (*rep_sync) __P((DB_ENV *, u_int32_t));
2262	int  (*repmgr_add_remote_site)
2263		__P((DB_ENV *, const char *, u_int, int *, u_int32_t));
2264	int  (*repmgr_get_ack_policy) __P((DB_ENV *, int *));
2265	int  (*repmgr_set_ack_policy) __P((DB_ENV *, int));
2266	int  (*repmgr_set_local_site)
2267		__P((DB_ENV *, const char *, u_int, u_int32_t));
2268	int  (*repmgr_site_list)
2269		__P((DB_ENV *, u_int *, DB_REPMGR_SITE **));
2270	int  (*repmgr_start) __P((DB_ENV *, int, u_int32_t));
2271	int  (*repmgr_stat) __P((DB_ENV *, DB_REPMGR_STAT **, u_int32_t));
2272	int  (*repmgr_stat_print) __P((DB_ENV *, u_int32_t));
2273	int  (*set_alloc) __P((DB_ENV *, void *(*)(size_t),
2274		void *(*)(void *, size_t), void (*)(void *)));
2275	int  (*set_app_dispatch)
2276		__P((DB_ENV *, int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops)));
2277	int  (*set_cache_max) __P((DB_ENV *, u_int32_t, u_int32_t));
2278	int  (*set_cachesize) __P((DB_ENV *, u_int32_t, u_int32_t, int));
2279	int  (*set_create_dir) __P((DB_ENV *, const char *));
2280	int  (*set_data_dir) __P((DB_ENV *, const char *));
2281	int  (*set_encrypt) __P((DB_ENV *, const char *, u_int32_t));
2282	void (*set_errcall) __P((DB_ENV *,
2283		void (*)(const DB_ENV *, const char *, const char *)));
2284	void (*set_errfile) __P((DB_ENV *, FILE *));
2285	void (*set_errpfx) __P((DB_ENV *, const char *));
2286	int  (*set_event_notify)
2287		__P((DB_ENV *, void (*)(DB_ENV *, u_int32_t, void *)));
2288	int  (*set_feedback) __P((DB_ENV *, void (*)(DB_ENV *, int, int)));
2289	int  (*set_flags) __P((DB_ENV *, u_int32_t, int));
2290	int  (*set_intermediate_dir_mode) __P((DB_ENV *, const char *));
2291	int  (*set_isalive) __P((DB_ENV *,
2292		int (*)(DB_ENV *, pid_t, db_threadid_t, u_int32_t)));
2293	int  (*set_lg_bsize) __P((DB_ENV *, u_int32_t));
2294	int  (*set_lg_dir) __P((DB_ENV *, const char *));
2295	int  (*set_lg_filemode) __P((DB_ENV *, int));
2296	int  (*set_lg_max) __P((DB_ENV *, u_int32_t));
2297	int  (*set_lg_regionmax) __P((DB_ENV *, u_int32_t));
2298	int  (*set_lk_conflicts) __P((DB_ENV *, u_int8_t *, int));
2299	int  (*set_lk_detect) __P((DB_ENV *, u_int32_t));
2300	int  (*set_lk_max_lockers) __P((DB_ENV *, u_int32_t));
2301	int  (*set_lk_max_locks) __P((DB_ENV *, u_int32_t));
2302	int  (*set_lk_max_objects) __P((DB_ENV *, u_int32_t));
2303	int  (*set_lk_partitions) __P((DB_ENV *, u_int32_t));
2304	int  (*set_mp_max_openfd) __P((DB_ENV *, int));
2305	int  (*set_mp_max_write) __P((DB_ENV *, int, db_timeout_t));
2306	int  (*set_mp_mmapsize) __P((DB_ENV *, size_t));
2307	int  (*set_mp_pagesize) __P((DB_ENV *, u_int32_t));
2308	int  (*set_mp_tablesize) __P((DB_ENV *, u_int32_t));
2309	void (*set_msgcall)
2310		__P((DB_ENV *, void (*)(const DB_ENV *, const char *)));
2311	void (*set_msgfile) __P((DB_ENV *, FILE *));
2312	int  (*set_paniccall) __P((DB_ENV *, void (*)(DB_ENV *, int)));
2313	int  (*set_rpc_server)
2314		__P((DB_ENV *, void *, const char *, long, long, u_int32_t));
2315	int  (*set_shm_key) __P((DB_ENV *, long));
2316	int  (*set_thread_count) __P((DB_ENV *, u_int32_t));
2317	int  (*set_thread_id)
2318		__P((DB_ENV *, void (*)(DB_ENV *, pid_t *, db_threadid_t *)));
2319	int  (*set_thread_id_string) __P((DB_ENV *,
2320		char *(*)(DB_ENV *, pid_t, db_threadid_t, char *)));
2321	int  (*set_timeout) __P((DB_ENV *, db_timeout_t, u_int32_t));
2322	int  (*set_tmp_dir) __P((DB_ENV *, const char *));
2323	int  (*set_tx_max) __P((DB_ENV *, u_int32_t));
2324	int  (*set_tx_timestamp) __P((DB_ENV *, time_t *));
2325	int  (*set_verbose) __P((DB_ENV *, u_int32_t, int));
2326	int  (*stat_print) __P((DB_ENV *, u_int32_t));
2327	int  (*txn_begin) __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t));
2328	int  (*txn_checkpoint) __P((DB_ENV *, u_int32_t, u_int32_t, u_int32_t));
2329	int  (*txn_recover) __P((DB_ENV *,
2330		DB_PREPLIST *, u_int32_t, u_int32_t *, u_int32_t));
2331	int  (*txn_stat) __P((DB_ENV *, DB_TXN_STAT **, u_int32_t));
2332	int  (*txn_stat_print) __P((DB_ENV *, u_int32_t));
2333	/* DB_ENV PUBLIC HANDLE LIST END */
2334
2335	/* DB_ENV PRIVATE HANDLE LIST BEGIN */
2336	int  (*prdbt) __P((DBT *,
2337		int, const char *, void *, int (*)(void *, const void *), int));
2338	/* DB_ENV PRIVATE HANDLE LIST END */
2339};
2340
2341/*
2342 * Dispatch structure for recovery and print routines.  Since internal and
2343 * external routines take different arguments (ENV versus DB_ENV), we need
2344 * something more elaborate than a single pointer and size.
2345 */
2346struct __db_distab {
2347	int   (**int_dispatch) __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
2348	size_t	int_size;
2349	int   (**ext_dispatch) __P((DB_ENV *, DBT *, DB_LSN *, db_recops));
2350	size_t	ext_size;
2351};
2352
2353#ifndef DB_DBM_HSEARCH
2354#define	DB_DBM_HSEARCH	0		/* No historic interfaces by default. */
2355#endif
2356#if DB_DBM_HSEARCH != 0
2357/*******************************************************
2358 * Dbm/Ndbm historic interfaces.
2359 *******************************************************/
2360typedef struct __db DBM;
2361
2362#define	DBM_INSERT	0		/* Flags to dbm_store(). */
2363#define	DBM_REPLACE	1
2364
2365/*
2366 * The DB support for ndbm(3) always appends this suffix to the
2367 * file name to avoid overwriting the user's original database.
2368 */
2369#define	DBM_SUFFIX	".db"
2370
2371#if defined(_XPG4_2)
2372typedef struct {
2373	char *dptr;
2374	size_t dsize;
2375} datum;
2376#else
2377typedef struct {
2378	char *dptr;
2379	int dsize;
2380} datum;
2381#endif
2382
2383/*
2384 * Translate NDBM calls into DB calls so that DB doesn't step on the
2385 * application's name space.
2386 */
2387#define	dbm_clearerr(a)		__db_ndbm_clearerr@DB_VERSION_UNIQUE_NAME@(a)
2388#define	dbm_close(a)		__db_ndbm_close@DB_VERSION_UNIQUE_NAME@(a)
2389#define	dbm_delete(a, b)	__db_ndbm_delete@DB_VERSION_UNIQUE_NAME@(a, b)
2390#define	dbm_dirfno(a)		__db_ndbm_dirfno@DB_VERSION_UNIQUE_NAME@(a)
2391#define	dbm_error(a)		__db_ndbm_error@DB_VERSION_UNIQUE_NAME@(a)
2392#define	dbm_fetch(a, b)		__db_ndbm_fetch@DB_VERSION_UNIQUE_NAME@(a, b)
2393#define	dbm_firstkey(a)		__db_ndbm_firstkey@DB_VERSION_UNIQUE_NAME@(a)
2394#define	dbm_nextkey(a)		__db_ndbm_nextkey@DB_VERSION_UNIQUE_NAME@(a)
2395#define	dbm_open(a, b, c)	__db_ndbm_open@DB_VERSION_UNIQUE_NAME@(a, b, c)
2396#define	dbm_pagfno(a)		__db_ndbm_pagfno@DB_VERSION_UNIQUE_NAME@(a)
2397#define	dbm_rdonly(a)		__db_ndbm_rdonly@DB_VERSION_UNIQUE_NAME@(a)
2398#define	dbm_store(a, b, c, d) \
2399	__db_ndbm_store@DB_VERSION_UNIQUE_NAME@(a, b, c, d)
2400
2401/*
2402 * Translate DBM calls into DB calls so that DB doesn't step on the
2403 * application's name space.
2404 *
2405 * The global variables dbrdonly, dirf and pagf were not retained when 4BSD
2406 * replaced the dbm interface with ndbm, and are not supported here.
2407 */
2408#define	dbminit(a)	__db_dbm_init@DB_VERSION_UNIQUE_NAME@(a)
2409#define	dbmclose	__db_dbm_close@DB_VERSION_UNIQUE_NAME@
2410#if !defined(__cplusplus)
2411#define	delete(a)	__db_dbm_delete@DB_VERSION_UNIQUE_NAME@(a)
2412#endif
2413#define	fetch(a)	__db_dbm_fetch@DB_VERSION_UNIQUE_NAME@(a)
2414#define	firstkey	__db_dbm_firstkey@DB_VERSION_UNIQUE_NAME@
2415#define	nextkey(a)	__db_dbm_nextkey@DB_VERSION_UNIQUE_NAME@(a)
2416#define	store(a, b)	__db_dbm_store@DB_VERSION_UNIQUE_NAME@(a, b)
2417
2418/*******************************************************
2419 * Hsearch historic interface.
2420 *******************************************************/
2421typedef enum {
2422	FIND, ENTER
2423} ACTION;
2424
2425typedef struct entry {
2426	char *key;
2427	char *data;
2428} ENTRY;
2429
2430#define	hcreate(a)	__db_hcreate@DB_VERSION_UNIQUE_NAME@(a)
2431#define	hdestroy	__db_hdestroy@DB_VERSION_UNIQUE_NAME@
2432#define	hsearch(a, b)	__db_hsearch@DB_VERSION_UNIQUE_NAME@(a, b)
2433
2434#endif /* DB_DBM_HSEARCH */
2435
2436#if defined(__cplusplus)
2437}
2438#endif
2439
2440@platform_footer@
2441#endif /* !_DB_H_ */
2442