1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996,2008 Oracle.  All rights reserved.
5 *
6 * $Id: db_iface.c,v 12.85 2008/03/24 19:34:50 bschmeck Exp $
7 */
8
9#include "db_config.h"
10
11#include "db_int.h"
12#include "dbinc/db_page.h"
13#include "dbinc/btree.h"
14#include "dbinc/hash.h"
15#ifndef HAVE_QUEUE
16#include "dbinc/qam.h"			/* For __db_no_queue_am(). */
17#endif
18#include "dbinc/lock.h"
19#include "dbinc/log.h"
20#include "dbinc/mp.h"
21#include "dbinc/txn.h"
22
23static int __db_associate_arg __P((DB *, DB *,
24	       int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t));
25static int __dbc_del_arg __P((DBC *, u_int32_t));
26static int __dbc_get_arg __P((DBC *, DBT *, DBT *, u_int32_t));
27static int __dbc_pget_arg __P((DBC *, DBT *, u_int32_t));
28static int __dbc_put_arg __P((DBC *, DBT *, DBT *, u_int32_t));
29static int __db_curinval __P((const ENV *));
30static int __db_cursor_arg __P((DB *, u_int32_t));
31static int __db_del_arg __P((DB *, DBT *, u_int32_t));
32static int __db_get_arg __P((const DB *, DBT *, DBT *, u_int32_t));
33static int __db_join_arg __P((DB *, DBC **, u_int32_t));
34static int __db_open_arg __P((DB *,
35	       DB_TXN *, const char *, const char *, DBTYPE, u_int32_t));
36static int __db_pget_arg __P((DB *, DBT *, u_int32_t));
37static int __db_put_arg __P((DB *, DBT *, DBT *, u_int32_t));
38static int __dbt_ferr __P((const DB *, const char *, const DBT *, int));
39static int __db_associate_foreign_arg __P((DB *, DB *,
40		int (*)(DB *, const DBT *, DBT *, const DBT *, int *),
41		u_int32_t));
42
43/*
44 * These functions implement the Berkeley DB API.  They are organized in a
45 * layered fashion.  The interface functions (XXX_pp) perform all generic
46 * error checks (for example, PANIC'd region, replication state change
47 * in progress, inconsistent transaction usage), call function-specific
48 * check routines (_arg) to check for proper flag usage, etc., do pre-amble
49 * processing (incrementing handle counts, handling local transactions),
50 * call the function and then do post-amble processing (local transactions,
51 * decrement handle counts).
52 *
53 * The basic structure is:
54 *	Check for simple/generic errors (PANIC'd region)
55 *	Check if replication is changing state (increment handle count).
56 *	Call function-specific argument checking routine
57 *	Create internal transaction if necessary
58 *	Call underlying worker function
59 *	Commit/abort internal transaction if necessary
60 *	Decrement handle count
61 */
62
63/*
64 * __db_associate_pp --
65 *	DB->associate pre/post processing.
66 *
67 * PUBLIC: int __db_associate_pp __P((DB *, DB_TXN *, DB *,
68 * PUBLIC:     int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t));
69 */
70int
71__db_associate_pp(dbp, txn, sdbp, callback, flags)
72	DB *dbp, *sdbp;
73	DB_TXN *txn;
74	int (*callback) __P((DB *, const DBT *, const DBT *, DBT *));
75	u_int32_t flags;
76{
77	DBC *sdbc;
78	DB_THREAD_INFO *ip;
79	ENV *env;
80	int handle_check, ret, t_ret, txn_local;
81
82	env = dbp->env;
83	txn_local = 0;
84
85	STRIP_AUTO_COMMIT(flags);
86
87	ENV_ENTER(env, ip);
88
89	/* Check for replication block. */
90	handle_check = IS_ENV_REPLICATED(env);
91	if (handle_check &&
92	    (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) {
93		handle_check = 0;
94		goto err;
95	}
96
97	/*
98	 * Secondary cursors may have the primary's lock file ID, so we need
99	 * to make sure that no older cursors are lying around when we make
100	 * the transition.
101	 */
102	if (TAILQ_FIRST(&sdbp->active_queue) != NULL ||
103	    TAILQ_FIRST(&sdbp->join_queue) != NULL) {
104		__db_errx(env,
105    "Databases may not become secondary indices while cursors are open");
106		ret = EINVAL;
107		goto err;
108	}
109
110	if ((ret = __db_associate_arg(dbp, sdbp, callback, flags)) != 0)
111		goto err;
112
113	/*
114	 * Create a local transaction as necessary, check for consistent
115	 * transaction usage, and, if we have no transaction but do have
116	 * locking on, acquire a locker id for the handle lock acquisition.
117	 */
118	if (IS_DB_AUTO_COMMIT(dbp, txn)) {
119		if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0)
120			goto err;
121		txn_local = 1;
122	}
123
124	/* Check for consistent transaction usage. */
125	if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0)
126		goto err;
127
128	while ((sdbc = TAILQ_FIRST(&sdbp->free_queue)) != NULL)
129		if ((ret = __dbc_destroy(sdbc)) != 0)
130			goto err;
131
132	ret = __db_associate(dbp, ip, txn, sdbp, callback, flags);
133
134err:	if (txn_local &&
135	    (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0)
136		ret = t_ret;
137
138	/* Release replication block. */
139	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
140		ret = t_ret;
141	ENV_LEAVE(env, ip);
142	return (ret);
143}
144
145/*
146 * __db_associate_arg --
147 *	Check DB->associate arguments.
148 */
149static int
150__db_associate_arg(dbp, sdbp, callback, flags)
151	DB *dbp, *sdbp;
152	int (*callback) __P((DB *, const DBT *, const DBT *, DBT *));
153	u_int32_t flags;
154{
155	ENV *env;
156	int ret;
157
158	env = dbp->env;
159
160	if (F_ISSET(sdbp, DB_AM_SECONDARY)) {
161		__db_errx(env,
162		    "Secondary index handles may not be re-associated");
163		return (EINVAL);
164	}
165	if (F_ISSET(dbp, DB_AM_SECONDARY)) {
166		__db_errx(env,
167		    "Secondary indices may not be used as primary databases");
168		return (EINVAL);
169	}
170	if (F_ISSET(dbp, DB_AM_DUP)) {
171		__db_errx(env,
172		    "Primary databases may not be configured with duplicates");
173		return (EINVAL);
174	}
175	if (F_ISSET(dbp, DB_AM_RENUMBER)) {
176		__db_errx(env,
177	    "Renumbering recno databases may not be used as primary databases");
178		return (EINVAL);
179	}
180
181	/*
182	 * It's OK for the primary and secondary to not share an environment IFF
183	 * the environments are local to the DB handle.  (Specifically, cursor
184	 * adjustment will work correctly in this case.)  The environment being
185	 * local implies the environment is not configured for either locking or
186	 * transactions, as neither of those could work correctly.
187	 */
188	if (dbp->env != sdbp->env &&
189	    (!F_ISSET(dbp->env, ENV_DBLOCAL) ||
190	     !F_ISSET(sdbp->env, ENV_DBLOCAL))) {
191		__db_errx(env,
192	    "The primary and secondary must be opened in the same environment");
193		return (EINVAL);
194	}
195	if ((DB_IS_THREADED(dbp) && !DB_IS_THREADED(sdbp)) ||
196	    (!DB_IS_THREADED(dbp) && DB_IS_THREADED(sdbp))) {
197		__db_errx(env,
198	    "The DB_THREAD setting must be the same for primary and secondary");
199		return (EINVAL);
200	}
201	if (callback == NULL &&
202	    (!F_ISSET(dbp, DB_AM_RDONLY) || !F_ISSET(sdbp, DB_AM_RDONLY))) {
203		__db_errx(env,
204    "Callback function may be NULL only when database handles are read-only");
205		return (EINVAL);
206	}
207
208	if ((ret = __db_fchk(env, "DB->associate", flags, DB_CREATE |
209	    DB_IMMUTABLE_KEY)) != 0)
210		return (ret);
211
212	return (0);
213}
214
215/*
216 * __db_close_pp --
217 *	DB->close pre/post processing.
218 *
219 * PUBLIC: int __db_close_pp __P((DB *, u_int32_t));
220 */
221int
222__db_close_pp(dbp, flags)
223	DB *dbp;
224	u_int32_t flags;
225{
226	DB_THREAD_INFO *ip;
227	ENV *env;
228	int handle_check, ret, t_ret;
229
230	env = dbp->env;
231	ret = 0;
232
233	/*
234	 * Close a DB handle -- as a handle destructor, we can't fail.
235	 *
236	 * !!!
237	 * The actual argument checking is simple, do it inline, outside of
238	 * the replication block.
239	 */
240	if (flags != 0 && flags != DB_NOSYNC)
241		ret = __db_ferr(env, "DB->close", 0);
242
243	ENV_ENTER(env, ip);
244
245	/* Check for replication block. */
246	handle_check = IS_ENV_REPLICATED(env);
247	if (handle_check && (t_ret = __db_rep_enter(dbp, 0, 0, 0)) != 0) {
248		handle_check = 0;
249		if (ret == 0)
250			ret = t_ret;
251	}
252
253	if ((t_ret = __db_close(dbp, NULL, flags)) != 0 && ret == 0)
254		ret = t_ret;
255
256	/* Release replication block. */
257	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
258		ret = t_ret;
259
260	ENV_LEAVE(env, ip);
261	return (ret);
262}
263
264/*
265 * __db_cursor_pp --
266 *	DB->cursor pre/post processing.
267 *
268 * PUBLIC: int __db_cursor_pp __P((DB *, DB_TXN *, DBC **, u_int32_t));
269 */
270int
271__db_cursor_pp(dbp, txn, dbcp, flags)
272	DB *dbp;
273	DB_TXN *txn;
274	DBC **dbcp;
275	u_int32_t flags;
276{
277	DB_THREAD_INFO *ip;
278	ENV *env;
279	REGENV *renv;
280	int rep_blocked, ret;
281
282	env = dbp->env;
283
284	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->cursor");
285
286	ENV_ENTER(env, ip);
287
288	/* Check for replication block. */
289	rep_blocked = 0;
290	if (txn == NULL && IS_ENV_REPLICATED(env)) {
291		if ((ret = __op_rep_enter(env)) != 0)
292			goto err;
293		rep_blocked = 1;
294		renv = env->reginfo->primary;
295		if (dbp->timestamp != renv->rep_timestamp) {
296			__db_errx(env, "%s %s",
297		    "replication recovery unrolled committed transactions;",
298		    "open DB and DBcursor handles must be closed");
299			ret = DB_REP_HANDLE_DEAD;
300			goto err;
301		}
302	}
303	if ((ret = __db_cursor_arg(dbp, flags)) != 0)
304		goto err;
305
306	/*
307	 * Check for consistent transaction usage.  For now, assume this
308	 * cursor might be used for read operations only (in which case
309	 * it may not require a txn).  We'll check more stringently in
310	 * c_del and c_put.  (Note this means the read-op txn tests have
311	 * to be a subset of the write-op ones.)
312	 */
313	if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 1)) != 0)
314		goto err;
315
316	ret = __db_cursor(dbp, ip, txn, dbcp, flags);
317
318err:	/* Release replication block on error. */
319	if (ret != 0 && rep_blocked)
320		(void)__op_rep_exit(env);
321
322	ENV_LEAVE(env, ip);
323	return (ret);
324}
325
326/*
327 * __db_cursor --
328 *	DB->cursor.
329 *
330 * PUBLIC: int __db_cursor __P((DB *,
331 * PUBLIC:      DB_THREAD_INFO *, DB_TXN *, DBC **, u_int32_t));
332 */
333int
334__db_cursor(dbp, ip, txn, dbcp, flags)
335	DB *dbp;
336	DB_THREAD_INFO *ip;
337	DB_TXN *txn;
338	DBC **dbcp;
339	u_int32_t flags;
340{
341	DBC *dbc;
342	ENV *env;
343	db_lockmode_t mode;
344	u_int32_t op;
345	int ret;
346
347	env = dbp->env;
348
349	if (MULTIVERSION(dbp) && txn == NULL && (LF_ISSET(DB_TXN_SNAPSHOT) ||
350	    F_ISSET(env->dbenv, DB_ENV_TXN_SNAPSHOT))) {
351		if ((ret =
352		    __txn_begin(env, ip, NULL, &txn, DB_TXN_SNAPSHOT)) != 0)
353			return (ret);
354		F_SET(txn, TXN_PRIVATE);
355	}
356
357	if ((ret = __db_cursor_int(dbp, ip,
358	    txn, dbp->type, PGNO_INVALID, 0, NULL, &dbc)) != 0)
359		return (ret);
360
361	/*
362	 * If this is CDB, do all the locking in the interface, which is
363	 * right here.
364	 */
365	if (CDB_LOCKING(env)) {
366		op = LF_ISSET(DB_OPFLAGS_MASK);
367		mode = (op == DB_WRITELOCK) ? DB_LOCK_WRITE :
368		    ((op == DB_WRITECURSOR || txn != NULL) ? DB_LOCK_IWRITE :
369		    DB_LOCK_READ);
370		if ((ret = __lock_get(env, dbc->locker, 0,
371		    &dbc->lock_dbt, mode, &dbc->mylock)) != 0)
372			goto err;
373		if (op == DB_WRITECURSOR)
374			F_SET(dbc, DBC_WRITECURSOR);
375		if (op == DB_WRITELOCK)
376			F_SET(dbc, DBC_WRITER);
377	}
378
379	if (LF_ISSET(DB_READ_UNCOMMITTED) ||
380	    (txn != NULL && F_ISSET(txn, TXN_READ_UNCOMMITTED)))
381		F_SET(dbc, DBC_READ_UNCOMMITTED);
382
383	if (LF_ISSET(DB_READ_COMMITTED) ||
384	    (txn != NULL && F_ISSET(txn, TXN_READ_COMMITTED)))
385		F_SET(dbc, DBC_READ_COMMITTED);
386
387	*dbcp = dbc;
388	return (0);
389
390err:	(void)__dbc_close(dbc);
391	return (ret);
392}
393
394/*
395 * __db_cursor_arg --
396 *	Check DB->cursor arguments.
397 */
398static int
399__db_cursor_arg(dbp, flags)
400	DB *dbp;
401	u_int32_t flags;
402{
403	ENV *env;
404
405	env = dbp->env;
406
407	/*
408	 * DB_READ_COMMITTED and DB_READ_UNCOMMITTED require locking.
409	 */
410	if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED)) {
411		if (!LOCKING_ON(env))
412			return (__db_fnl(env, "DB->cursor"));
413	}
414
415	LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT);
416
417	/* Check for invalid function flags. */
418	switch (flags) {
419	case 0:
420		break;
421	case DB_WRITECURSOR:
422		if (DB_IS_READONLY(dbp))
423			return (__db_rdonly(env, "DB->cursor"));
424		if (!CDB_LOCKING(env))
425			return (__db_ferr(env, "DB->cursor", 0));
426		break;
427	case DB_WRITELOCK:
428		if (DB_IS_READONLY(dbp))
429			return (__db_rdonly(env, "DB->cursor"));
430		break;
431	default:
432		return (__db_ferr(env, "DB->cursor", 0));
433	}
434
435	return (0);
436}
437
438/*
439 * __db_del_pp --
440 *	DB->del pre/post processing.
441 *
442 * PUBLIC: int __db_del_pp __P((DB *, DB_TXN *, DBT *, u_int32_t));
443 */
444int
445__db_del_pp(dbp, txn, key, flags)
446	DB *dbp;
447	DB_TXN *txn;
448	DBT *key;
449	u_int32_t flags;
450{
451	DB_THREAD_INFO *ip;
452	ENV *env;
453	int handle_check, ret, t_ret, txn_local;
454
455	env = dbp->env;
456	txn_local = 0;
457
458	STRIP_AUTO_COMMIT(flags);
459	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->del");
460
461#ifdef CONFIG_TEST
462	if (IS_REP_MASTER(env))
463		DB_TEST_WAIT(env, env->test_check);
464#endif
465	ENV_ENTER(env, ip);
466
467	/* Check for replication block. */
468	handle_check = IS_ENV_REPLICATED(env);
469	if (handle_check &&
470	     (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) {
471			handle_check = 0;
472			goto err;
473	}
474
475	if ((ret = __db_del_arg(dbp, key, flags)) != 0)
476		goto err;
477
478	/* Create local transaction as necessary. */
479	if (IS_DB_AUTO_COMMIT(dbp, txn)) {
480		if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0)
481			goto err;
482		txn_local = 1;
483	}
484
485	/* Check for consistent transaction usage. */
486	if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0)
487		goto err;
488
489	ret = __db_del(dbp, ip, txn, key, flags);
490
491err:	if (txn_local &&
492	    (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0)
493		ret = t_ret;
494
495	/* Release replication block. */
496	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
497		ret = t_ret;
498	ENV_LEAVE(env, ip);
499	__dbt_userfree(env, key, NULL, NULL);
500	return (ret);
501}
502
503/*
504 * __db_del_arg --
505 *	Check DB->delete arguments.
506 */
507static int
508__db_del_arg(dbp, key, flags)
509	DB *dbp;
510	DBT *key;
511	u_int32_t flags;
512{
513	ENV *env;
514	int ret;
515
516	env = dbp->env;
517
518	/* Check for changes to a read-only tree. */
519	if (DB_IS_READONLY(dbp))
520		return (__db_rdonly(env, "DB->del"));
521
522	/* Check for invalid function flags. */
523	switch (flags) {
524	case 0:
525		if ((ret = __dbt_usercopy(env, key)) != 0)
526			return (ret);
527		break;
528	default:
529		return (__db_ferr(env, "DB->del", 0));
530	}
531
532	return (0);
533}
534
535/*
536 * __db_exists --
537 *	DB->exists implementation.
538 *
539 * PUBLIC: int __db_exists __P((DB *, DB_TXN *, DBT *, u_int32_t));
540 */
541int
542__db_exists(dbp, txn, key, flags)
543	DB *dbp;
544	DB_TXN *txn;
545	DBT *key;
546	u_int32_t flags;
547{
548	DBT data;
549	int ret;
550
551	/*
552	 * Most flag checking is done in the DB->get call, we only check for
553	 * specific incompatibilities here.  This saves making __get_arg
554	 * aware of the exist method's API constraints.
555	 */
556	if ((ret = __db_fchk(dbp->env, "DB->exists", flags,
557	    DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) != 0)
558		return (ret);
559
560	/*
561	 * Configure a data DBT that returns no bytes so there's no copy
562	 * of the data.
563	 */
564	memset(&data, 0, sizeof(data));
565	data.dlen = 0;
566	data.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM;
567
568	return (dbp->get(dbp, txn, key, &data, flags));
569}
570
571/*
572 * db_fd_pp --
573 *	DB->fd pre/post processing.
574 *
575 * PUBLIC: int __db_fd_pp __P((DB *, int *));
576 */
577int
578__db_fd_pp(dbp, fdp)
579	DB *dbp;
580	int *fdp;
581{
582	DB_FH *fhp;
583	DB_THREAD_INFO *ip;
584	ENV *env;
585	int handle_check, ret, t_ret;
586
587	env = dbp->env;
588
589	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->fd");
590
591	ENV_ENTER(env, ip);
592
593	/* Check for replication block. */
594	handle_check = IS_ENV_REPLICATED(env);
595	if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0)
596		goto err;
597
598	/*
599	 * !!!
600	 * There's no argument checking to be done.
601	 *
602	 * !!!
603	 * The actual method call is simple, do it inline.
604	 *
605	 * XXX
606	 * Truly spectacular layering violation.
607	 */
608	if ((ret = __mp_xxx_fh(dbp->mpf, &fhp)) == 0) {
609		if (fhp == NULL) {
610			*fdp = -1;
611			__db_errx(env,
612			    "Database does not have a valid file handle");
613			ret = ENOENT;
614		} else
615			*fdp = fhp->fd;
616	}
617
618	/* Release replication block. */
619	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
620		ret = t_ret;
621
622err:	ENV_LEAVE(env, ip);
623	return (ret);
624}
625
626/*
627 * __db_get_pp --
628 *	DB->get pre/post processing.
629 *
630 * PUBLIC: int __db_get_pp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
631 */
632int
633__db_get_pp(dbp, txn, key, data, flags)
634	DB *dbp;
635	DB_TXN *txn;
636	DBT *key, *data;
637	u_int32_t flags;
638{
639	DB_THREAD_INFO *ip;
640	ENV *env;
641	u_int32_t mode;
642	int handle_check, ignore_lease, ret, t_ret, txn_local;
643
644	env = dbp->env;
645	mode = 0;
646	txn_local = 0;
647
648	STRIP_AUTO_COMMIT(flags);
649	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get");
650
651	ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0;
652	LF_CLR(DB_IGNORE_LEASE);
653
654	if ((ret = __db_get_arg(dbp, key, data, flags)) != 0)
655		return (ret);
656
657	ENV_ENTER(env, ip);
658
659	/* Check for replication block. */
660	handle_check = IS_ENV_REPLICATED(env);
661	if (handle_check &&
662	     (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) {
663			handle_check = 0;
664			goto err;
665	}
666
667	if (LF_ISSET(DB_READ_UNCOMMITTED))
668		mode = DB_READ_UNCOMMITTED;
669	else if ((flags & DB_OPFLAGS_MASK) == DB_CONSUME ||
670	    (flags & DB_OPFLAGS_MASK) == DB_CONSUME_WAIT) {
671		mode = DB_WRITELOCK;
672		if (IS_DB_AUTO_COMMIT(dbp, txn)) {
673			if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0)
674				goto err;
675			txn_local = 1;
676		}
677	}
678
679	/* Check for consistent transaction usage. */
680	if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID,
681	    mode == DB_WRITELOCK || LF_ISSET(DB_RMW) ? 0 : 1)) != 0)
682		goto err;
683
684	ret = __db_get(dbp, ip, txn, key, data, flags);
685	/*
686	 * Check for master leases.
687	 */
688	if (ret == 0 &&
689	    IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease)
690		ret = __rep_lease_check(env, 1);
691
692err:	if (txn_local &&
693	    (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0)
694		ret = t_ret;
695
696	/* Release replication block. */
697	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
698		ret = t_ret;
699
700	ENV_LEAVE(env, ip);
701	__dbt_userfree(env, key, NULL, data);
702	return (ret);
703}
704
705/*
706 * __db_get --
707 *	DB->get.
708 *
709 * PUBLIC: int __db_get __P((DB *,
710 * PUBLIC:     DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, u_int32_t));
711 */
712int
713__db_get(dbp, ip, txn, key, data, flags)
714	DB *dbp;
715	DB_THREAD_INFO *ip;
716	DB_TXN *txn;
717	DBT *key, *data;
718	u_int32_t flags;
719{
720	DBC *dbc;
721	u_int32_t mode;
722	int ret, t_ret;
723
724	mode = 0;
725	if (LF_ISSET(DB_READ_UNCOMMITTED)) {
726		mode = DB_READ_UNCOMMITTED;
727		LF_CLR(DB_READ_UNCOMMITTED);
728	} else if (LF_ISSET(DB_READ_COMMITTED)) {
729		mode = DB_READ_COMMITTED;
730		LF_CLR(DB_READ_COMMITTED);
731	} else if ((flags & DB_OPFLAGS_MASK) == DB_CONSUME ||
732	    (flags & DB_OPFLAGS_MASK) == DB_CONSUME_WAIT)
733		mode = DB_WRITELOCK;
734
735	if ((ret = __db_cursor(dbp, ip, txn, &dbc, mode)) != 0)
736		return (ret);
737
738	DEBUG_LREAD(dbc, txn, "DB->get", key, NULL, flags);
739
740	/*
741	 * The DBC_TRANSIENT flag indicates that we're just doing a
742	 * single operation with this cursor, and that in case of
743	 * error we don't need to restore it to its old position--we're
744	 * going to close it right away.  Thus, we can perform the get
745	 * without duplicating the cursor, saving some cycles in this
746	 * common case.
747	 */
748	F_SET(dbc, DBC_TRANSIENT);
749
750	/*
751	 * SET_RET_MEM indicates that if key and/or data have no DBT
752	 * flags set and DB manages the returned-data memory, that memory
753	 * will belong to this handle, not to the underlying cursor.
754	 */
755	SET_RET_MEM(dbc, dbp);
756
757	if (LF_ISSET(~(DB_RMW | DB_MULTIPLE)) == 0)
758		LF_SET(DB_SET);
759
760	ret = __dbc_get(dbc, key, data, flags);
761
762	if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
763		ret = t_ret;
764
765	return (ret);
766}
767
768/*
769 * __db_get_arg --
770 *	DB->get argument checking, used by both DB->get and DB->pget.
771 */
772static int
773__db_get_arg(dbp, key, data, flags)
774	const DB *dbp;
775	DBT *key, *data;
776	u_int32_t flags;
777{
778	ENV *env;
779	int dirty, multi, ret;
780
781	env = dbp->env;
782
783	/*
784	 * Check for read-modify-write validity.  DB_RMW doesn't make sense
785	 * with CDB cursors since if you're going to write the cursor, you
786	 * had to create it with DB_WRITECURSOR.  Regardless, we check for
787	 * LOCKING_ON and not STD_LOCKING, as we don't want to disallow it.
788	 * If this changes, confirm that DB does not itself set the DB_RMW
789	 * flag in a path where CDB may have been configured.
790	 */
791	dirty = 0;
792	if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) {
793		if (!LOCKING_ON(env))
794			return (__db_fnl(env, "DB->get"));
795		if ((ret = __db_fcchk(env, "DB->get",
796		    flags, DB_READ_UNCOMMITTED, DB_READ_COMMITTED)) != 0)
797			return (ret);
798		if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED))
799			dirty = 1;
800		LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW);
801	}
802
803	multi = 0;
804	if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) {
805		if (LF_ISSET(DB_MULTIPLE_KEY))
806			goto multi_err;
807		multi = LF_ISSET(DB_MULTIPLE) ? 1 : 0;
808		LF_CLR(DB_MULTIPLE);
809	}
810
811	/* Check for invalid function flags. */
812	switch (flags) {
813	case DB_GET_BOTH:
814		if ((ret = __dbt_usercopy(env, data)) != 0)
815			return (ret);
816		/* FALLTHROUGH */
817	case 0:
818		if ((ret = __dbt_usercopy(env, key)) != 0) {
819			__dbt_userfree(env, key, NULL, data);
820			return (ret);
821		}
822		break;
823	case DB_SET_RECNO:
824		if (!F_ISSET(dbp, DB_AM_RECNUM))
825			goto err;
826		if ((ret = __dbt_usercopy(env, key)) != 0)
827			return (ret);
828		break;
829	case DB_CONSUME:
830	case DB_CONSUME_WAIT:
831		if (dirty) {
832			__db_errx(env,
833		    "%s is not supported with DB_CONSUME or DB_CONSUME_WAIT",
834			     LF_ISSET(DB_READ_UNCOMMITTED) ?
835			     "DB_READ_UNCOMMITTED" : "DB_READ_COMMITTED");
836			return (EINVAL);
837		}
838		if (multi)
839multi_err:		return (__db_ferr(env, "DB->get", 1));
840		if (dbp->type == DB_QUEUE)
841			break;
842		/* FALLTHROUGH */
843	default:
844err:		return (__db_ferr(env, "DB->get", 0));
845	}
846
847	/*
848	 * Check for invalid key/data flags.
849	 */
850	if ((ret =
851	    __dbt_ferr(dbp, "key", key, DB_RETURNS_A_KEY(dbp, flags))) != 0)
852		return (ret);
853	if ((ret = __dbt_ferr(dbp, "data", data, 1)) != 0)
854		return (ret);
855
856	if (multi) {
857		if (!F_ISSET(data, DB_DBT_USERMEM)) {
858			__db_errx(env,
859			    "DB_MULTIPLE requires DB_DBT_USERMEM be set");
860			return (EINVAL);
861		}
862		if (F_ISSET(key, DB_DBT_PARTIAL) ||
863		    F_ISSET(data, DB_DBT_PARTIAL)) {
864			__db_errx(env,
865			    "DB_MULTIPLE does not support DB_DBT_PARTIAL");
866			return (EINVAL);
867		}
868		if (data->ulen < 1024 ||
869		    data->ulen < dbp->pgsize || data->ulen % 1024 != 0) {
870			__db_errx(env, "%s%s",
871			    "DB_MULTIPLE buffers must be ",
872			    "aligned, at least page size and multiples of 1KB");
873			return (EINVAL);
874		}
875	}
876
877	return (0);
878}
879
880/*
881 * __db_join_pp --
882 *	DB->join pre/post processing.
883 *
884 * PUBLIC: int __db_join_pp __P((DB *, DBC **, DBC **, u_int32_t));
885 */
886int
887__db_join_pp(primary, curslist, dbcp, flags)
888	DB *primary;
889	DBC **curslist, **dbcp;
890	u_int32_t flags;
891{
892	DB_THREAD_INFO *ip;
893	ENV *env;
894	int handle_check, ret, t_ret;
895
896	env = primary->env;
897
898	ENV_ENTER(env, ip);
899
900	/* Check for replication block. */
901	handle_check = IS_ENV_REPLICATED(env);
902	if (handle_check && (ret =
903	    __db_rep_enter(primary, 1, 0, curslist[0]->txn != NULL)) != 0) {
904		handle_check = 0;
905		goto err;
906	}
907
908	if ((ret = __db_join_arg(primary, curslist, flags)) == 0)
909		ret = __db_join(primary, curslist, dbcp, flags);
910
911	/* Release replication block. */
912	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
913		ret = t_ret;
914
915err:	ENV_LEAVE(env, ip);
916	return (ret);
917}
918
919/*
920 * __db_join_arg --
921 *	Check DB->join arguments.
922 */
923static int
924__db_join_arg(primary, curslist, flags)
925	DB *primary;
926	DBC **curslist;
927	u_int32_t flags;
928{
929	DB_TXN *txn;
930	ENV *env;
931	int i;
932
933	env = primary->env;
934
935	switch (flags) {
936	case 0:
937	case DB_JOIN_NOSORT:
938		break;
939	default:
940		return (__db_ferr(env, "DB->join", 0));
941	}
942
943	if (curslist == NULL || curslist[0] == NULL) {
944		__db_errx(env,
945	    "At least one secondary cursor must be specified to DB->join");
946		return (EINVAL);
947	}
948
949	txn = curslist[0]->txn;
950	for (i = 1; curslist[i] != NULL; i++)
951		if (curslist[i]->txn != txn) {
952			__db_errx(env,
953		    "All secondary cursors must share the same transaction");
954			return (EINVAL);
955		}
956
957	return (0);
958}
959
960/*
961 * __db_key_range_pp --
962 *	DB->key_range pre/post processing.
963 *
964 * PUBLIC: int __db_key_range_pp
965 * PUBLIC:     __P((DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t));
966 */
967int
968__db_key_range_pp(dbp, txn, key, kr, flags)
969	DB *dbp;
970	DB_TXN *txn;
971	DBT *key;
972	DB_KEY_RANGE *kr;
973	u_int32_t flags;
974{
975	DBC *dbc;
976	DB_THREAD_INFO *ip;
977	ENV *env;
978	int handle_check, ret, t_ret;
979
980	env = dbp->env;
981
982	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->key_range");
983
984	/*
985	 * !!!
986	 * The actual argument checking is simple, do it inline, outside of
987	 * the replication block.
988	 */
989	if (flags != 0)
990		return (__db_ferr(env, "DB->key_range", 0));
991
992	ENV_ENTER(env, ip);
993
994	/* Check for replication block. */
995	handle_check = IS_ENV_REPLICATED(env);
996	if (handle_check &&
997	     (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) {
998		handle_check = 0;
999		goto err;
1000	}
1001
1002	/* Check for consistent transaction usage. */
1003	if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 1)) != 0)
1004		goto err;
1005
1006	/*
1007	 * !!!
1008	 * The actual method call is simple, do it inline.
1009	 */
1010	switch (dbp->type) {
1011	case DB_BTREE:
1012#ifndef HAVE_BREW
1013		if ((ret = __dbt_usercopy(env, key)) != 0)
1014			goto err;
1015
1016		/* Acquire a cursor. */
1017		if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0)
1018			break;
1019
1020		DEBUG_LWRITE(dbc, NULL, "bam_key_range", NULL, NULL, 0);
1021
1022		ret = __bam_key_range(dbc, key, kr, flags);
1023
1024		if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
1025			ret = t_ret;
1026		__dbt_userfree(env, key, NULL, NULL);
1027		break;
1028#else
1029		COMPQUIET(dbc, NULL);
1030		COMPQUIET(key, NULL);
1031		COMPQUIET(kr, NULL);
1032		/* FALLTHROUGH */
1033#endif
1034	case DB_HASH:
1035	case DB_QUEUE:
1036	case DB_RECNO:
1037		ret = __dbh_am_chk(dbp, DB_OK_BTREE);
1038		break;
1039	case DB_UNKNOWN:
1040	default:
1041		ret = __db_unknown_type(env, "DB->key_range", dbp->type);
1042		break;
1043	}
1044
1045err:	/* Release replication block. */
1046	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
1047		ret = t_ret;
1048
1049	ENV_LEAVE(env, ip);
1050	return (ret);
1051}
1052
1053/*
1054 * __db_open_pp --
1055 *	DB->open pre/post processing.
1056 *
1057 * PUBLIC: int __db_open_pp __P((DB *, DB_TXN *,
1058 * PUBLIC:     const char *, const char *, DBTYPE, u_int32_t, int));
1059 */
1060int
1061__db_open_pp(dbp, txn, fname, dname, type, flags, mode)
1062	DB *dbp;
1063	DB_TXN *txn;
1064	const char *fname, *dname;
1065	DBTYPE type;
1066	u_int32_t flags;
1067	int mode;
1068{
1069	DB_THREAD_INFO *ip;
1070	ENV *env;
1071	int handle_check, nosync, remove_me, ret, t_ret, txn_local;
1072
1073	env = dbp->env;
1074	nosync = 1;
1075	handle_check = remove_me = txn_local = 0;
1076
1077	ENV_ENTER(env, ip);
1078
1079	/*
1080	 * Save the file and database names and flags.  We do this here
1081	 * because we don't pass all of the flags down into the actual
1082	 * DB->open method call, we strip DB_AUTO_COMMIT at this layer.
1083	 */
1084	if ((fname != NULL &&
1085	    (ret = __os_strdup(env, fname, &dbp->fname)) != 0))
1086		goto err;
1087	if ((dname != NULL &&
1088	    (ret = __os_strdup(env, dname, &dbp->dname)) != 0))
1089		goto err;
1090	dbp->open_flags = flags;
1091
1092	/* Save the current DB handle flags for refresh. */
1093	dbp->orig_flags = dbp->flags;
1094
1095	/* Check for replication block. */
1096	handle_check = IS_ENV_REPLICATED(env);
1097	if (handle_check &&
1098	    (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) {
1099		handle_check = 0;
1100		goto err;
1101	}
1102
1103	/*
1104	 * Create local transaction as necessary, check for consistent
1105	 * transaction usage.
1106	 */
1107	if (IS_ENV_AUTO_COMMIT(env, txn, flags)) {
1108		if ((ret = __db_txn_auto_init(env, ip, &txn)) != 0)
1109			goto err;
1110		txn_local = 1;
1111	} else if (txn != NULL && !TXN_ON(env) &&
1112	    (!CDB_LOCKING(env) || !F_ISSET(txn, TXN_CDSGROUP))) {
1113		ret = __db_not_txn_env(env);
1114		goto err;
1115	}
1116	LF_CLR(DB_AUTO_COMMIT);
1117
1118	/*
1119	 * We check arguments after possibly creating a local transaction,
1120	 * which is unusual -- the reason is some flags are illegal if any
1121	 * kind of transaction is in effect.
1122	 */
1123	if ((ret = __db_open_arg(dbp, txn, fname, dname, type, flags)) == 0)
1124		if ((ret = __db_open(dbp, ip, txn, fname, dname, type,
1125		    flags, mode, PGNO_BASE_MD)) != 0)
1126			goto txnerr;
1127
1128	/*
1129	 * You can open the database that describes the subdatabases in the
1130	 * rest of the file read-only.  The content of each key's data is
1131	 * unspecified and applications should never be adding new records
1132	 * or updating existing records.  However, during recovery, we need
1133	 * to open these databases R/W so we can redo/undo changes in them.
1134	 * Likewise, we need to open master databases read/write during
1135	 * rename and remove so we can be sure they're fully sync'ed, so
1136	 * we provide an override flag for the purpose.
1137	 */
1138	if (dname == NULL && !IS_RECOVERING(env) && !LF_ISSET(DB_RDONLY) &&
1139	    !LF_ISSET(DB_RDWRMASTER) && F_ISSET(dbp, DB_AM_SUBDB)) {
1140		__db_errx(env,
1141    "files containing multiple databases may only be opened read-only");
1142		ret = EINVAL;
1143		goto txnerr;
1144	}
1145
1146	/*
1147	 * Success: file creations have to be synchronous, otherwise we don't
1148	 * care.
1149	 */
1150	if (F_ISSET(dbp, DB_AM_CREATED | DB_AM_CREATED_MSTR))
1151		nosync = 0;
1152
1153	/* Success: don't discard the file on close. */
1154	F_CLR(dbp, DB_AM_DISCARD | DB_AM_CREATED | DB_AM_CREATED_MSTR);
1155
1156	/*
1157	 * If not transactional, remove the databases/subdatabases if it is
1158	 * persistent.  If we're transactional, the child transaction abort
1159	 * cleans up.
1160	 */
1161txnerr:	if (ret != 0 && !IS_REAL_TXN(txn)) {
1162		remove_me = (F_ISSET(dbp, DB_AM_CREATED) &&
1163			(fname != NULL || dname != NULL)) ? 1 : 0;
1164		if (F_ISSET(dbp, DB_AM_CREATED_MSTR) ||
1165		    (dname == NULL && remove_me))
1166			/* Remove file. */
1167			(void)__db_remove_int(dbp,
1168			    ip, txn, fname, NULL, DB_FORCE);
1169		else if (remove_me)
1170			/* Remove subdatabase. */
1171			(void)__db_remove_int(dbp,
1172			    ip, txn, fname, dname, DB_FORCE);
1173	}
1174
1175	if (txn_local && (t_ret =
1176	     __db_txn_auto_resolve(env, txn, nosync, ret)) && ret == 0)
1177		ret = t_ret;
1178
1179err:	/* Release replication block. */
1180	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
1181		ret = t_ret;
1182
1183	ENV_LEAVE(env, ip);
1184	return (ret);
1185}
1186
1187/*
1188 * __db_open_arg --
1189 *	Check DB->open arguments.
1190 */
1191static int
1192__db_open_arg(dbp, txn, fname, dname, type, flags)
1193	DB *dbp;
1194	DB_TXN *txn;
1195	const char *fname, *dname;
1196	DBTYPE type;
1197	u_int32_t flags;
1198{
1199	ENV *env;
1200	u_int32_t ok_flags;
1201	int ret;
1202
1203	env = dbp->env;
1204
1205	/* Validate arguments. */
1206#undef	OKFLAGS
1207#define	OKFLAGS								\
1208	(DB_AUTO_COMMIT | DB_CREATE | DB_EXCL | DB_FCNTL_LOCKING |	\
1209	DB_MULTIVERSION | DB_NOMMAP | DB_NO_AUTO_COMMIT | DB_RDONLY |	\
1210	DB_RDWRMASTER | DB_READ_UNCOMMITTED | DB_THREAD | DB_TRUNCATE)
1211	if ((ret = __db_fchk(env, "DB->open", flags, OKFLAGS)) != 0)
1212		return (ret);
1213	if (LF_ISSET(DB_EXCL) && !LF_ISSET(DB_CREATE))
1214		return (__db_ferr(env, "DB->open", 1));
1215	if (LF_ISSET(DB_RDONLY) && LF_ISSET(DB_CREATE))
1216		return (__db_ferr(env, "DB->open", 1));
1217
1218#ifdef	HAVE_VXWORKS
1219	if (LF_ISSET(DB_TRUNCATE)) {
1220		__db_errx(env, "DB_TRUNCATE not supported on VxWorks");
1221		return (DB_OPNOTSUP);
1222	}
1223#endif
1224	switch (type) {
1225	case DB_UNKNOWN:
1226		if (LF_ISSET(DB_CREATE|DB_TRUNCATE)) {
1227			__db_errx(env,
1228	    "DB_UNKNOWN type specified with DB_CREATE or DB_TRUNCATE");
1229			return (EINVAL);
1230		}
1231		ok_flags = 0;
1232		break;
1233	case DB_BTREE:
1234		ok_flags = DB_OK_BTREE;
1235		break;
1236	case DB_HASH:
1237#ifndef HAVE_HASH
1238		return (__db_no_hash_am(env));
1239#endif
1240		ok_flags = DB_OK_HASH;
1241		break;
1242	case DB_QUEUE:
1243#ifndef HAVE_QUEUE
1244		return (__db_no_queue_am(env));
1245#endif
1246		ok_flags = DB_OK_QUEUE;
1247		break;
1248	case DB_RECNO:
1249		ok_flags = DB_OK_RECNO;
1250		break;
1251	default:
1252		__db_errx(env, "unknown type: %lu", (u_long)type);
1253		return (EINVAL);
1254	}
1255	if (ok_flags)
1256		DB_ILLEGAL_METHOD(dbp, ok_flags);
1257
1258	/* The environment may have been created, but never opened. */
1259	if (!F_ISSET(env, ENV_DBLOCAL | ENV_OPEN_CALLED)) {
1260		__db_errx(env, "database environment not yet opened");
1261		return (EINVAL);
1262	}
1263
1264	/*
1265	 * Historically, you could pass in an environment that didn't have a
1266	 * mpool, and DB would create a private one behind the scenes.  This
1267	 * no longer works.
1268	 */
1269	if (!F_ISSET(env, ENV_DBLOCAL) && !MPOOL_ON(env)) {
1270		__db_errx(env, "environment did not include a memory pool");
1271		return (EINVAL);
1272	}
1273
1274	/*
1275	 * You can't specify threads during DB->open if subsystems in the
1276	 * environment weren't configured with them.
1277	 */
1278	if (LF_ISSET(DB_THREAD) && !F_ISSET(env, ENV_DBLOCAL | ENV_THREAD)) {
1279		__db_errx(env, "environment not created using DB_THREAD");
1280		return (EINVAL);
1281	}
1282
1283	/* DB_MULTIVERSION requires a database configured for transactions. */
1284	if (LF_ISSET(DB_MULTIVERSION) && !IS_REAL_TXN(txn)) {
1285		__db_errx(env,
1286		    "DB_MULTIVERSION illegal without a transaction specified");
1287		return (EINVAL);
1288	}
1289
1290	if (LF_ISSET(DB_MULTIVERSION) && type == DB_QUEUE) {
1291		__db_errx(env,
1292		    "DB_MULTIVERSION illegal with queue databases");
1293		return (EINVAL);
1294	}
1295
1296	/* DB_TRUNCATE is neither transaction recoverable nor lockable. */
1297	if (LF_ISSET(DB_TRUNCATE) && (LOCKING_ON(env) || txn != NULL)) {
1298		__db_errx(env,
1299		    "DB_TRUNCATE illegal with %s specified",
1300		    LOCKING_ON(env) ? "locking" : "transactions");
1301		return (EINVAL);
1302	}
1303
1304	/* Subdatabase checks. */
1305	if (dname != NULL) {
1306		/* QAM can only be done on in-memory subdatabases. */
1307		if (type == DB_QUEUE && fname != NULL) {
1308			__db_errx(
1309			    env, "Queue databases must be one-per-file");
1310			return (EINVAL);
1311		}
1312
1313		/*
1314		 * Named in-memory databases can't support certain flags,
1315		 * so check here.
1316		 */
1317		if (fname == NULL)
1318			F_CLR(dbp, DB_AM_CHKSUM | DB_AM_ENCRYPT);
1319	}
1320
1321	return (0);
1322}
1323
1324/*
1325 * __db_pget_pp --
1326 *	DB->pget pre/post processing.
1327 *
1328 * PUBLIC: int __db_pget_pp
1329 * PUBLIC:     __P((DB *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t));
1330 */
1331int
1332__db_pget_pp(dbp, txn, skey, pkey, data, flags)
1333	DB *dbp;
1334	DB_TXN *txn;
1335	DBT *skey, *pkey, *data;
1336	u_int32_t flags;
1337{
1338	DB_THREAD_INFO *ip;
1339	ENV *env;
1340	int handle_check, ignore_lease, ret, t_ret;
1341
1342	env = dbp->env;
1343
1344	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->pget");
1345
1346	ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0;
1347	LF_CLR(DB_IGNORE_LEASE);
1348
1349	if ((ret = __db_pget_arg(dbp, pkey, flags)) != 0 ||
1350	    (ret = __db_get_arg(dbp, skey, data, flags)) != 0) {
1351		__dbt_userfree(env, skey, pkey, data);
1352		return (ret);
1353	}
1354
1355	ENV_ENTER(env, ip);
1356
1357	/* Check for replication block. */
1358	handle_check = IS_ENV_REPLICATED(env);
1359	if (handle_check &&
1360	    (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) {
1361		handle_check = 0;
1362		goto err;
1363	}
1364
1365	ret = __db_pget(dbp, ip, txn, skey, pkey, data, flags);
1366	/*
1367	 * Check for master leases.
1368	 */
1369	if (ret == 0 &&
1370	    IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease)
1371		ret = __rep_lease_check(env, 1);
1372
1373err:	/* Release replication block. */
1374	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
1375		ret = t_ret;
1376
1377	ENV_LEAVE(env, ip);
1378	__dbt_userfree(env, skey, pkey, data);
1379	return (ret);
1380}
1381
1382/*
1383 * __db_pget --
1384 *	DB->pget.
1385 *
1386 * PUBLIC: int __db_pget __P((DB *,
1387 * PUBLIC:     DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t));
1388 */
1389int
1390__db_pget(dbp, ip, txn, skey, pkey, data, flags)
1391	DB *dbp;
1392	DB_THREAD_INFO *ip;
1393	DB_TXN *txn;
1394	DBT *skey, *pkey, *data;
1395	u_int32_t flags;
1396{
1397	DBC *dbc;
1398	u_int32_t mode;
1399	int ret, t_ret;
1400
1401	if (LF_ISSET(DB_READ_UNCOMMITTED)) {
1402		mode = DB_READ_UNCOMMITTED;
1403		LF_CLR(DB_READ_UNCOMMITTED);
1404	} else if (LF_ISSET(DB_READ_COMMITTED)) {
1405		mode = DB_READ_COMMITTED;
1406		LF_CLR(DB_READ_COMMITTED);
1407	} else
1408		mode = 0;
1409
1410	if ((ret = __db_cursor(dbp, ip, txn, &dbc, mode)) != 0)
1411		return (ret);
1412
1413	SET_RET_MEM(dbc, dbp);
1414
1415	DEBUG_LREAD(dbc, txn, "__db_pget", skey, NULL, flags);
1416
1417	/*
1418	 * !!!
1419	 * The actual method call is simple, do it inline.
1420	 *
1421	 * The underlying cursor pget will fill in a default DBT for null
1422	 * pkeys, and use the cursor's returned-key memory internally to
1423	 * store any intermediate primary keys.  However, we've just set
1424	 * the returned-key memory to the DB handle's key memory, which
1425	 * is unsafe to use if the DB handle is threaded.  If the pkey
1426	 * argument is NULL, use the DBC-owned returned-key memory
1427	 * instead;  it'll go away when we close the cursor before we
1428	 * return, but in this case that's just fine, as we're not
1429	 * returning the primary key.
1430	 */
1431	if (pkey == NULL)
1432		dbc->rkey = &dbc->my_rkey;
1433
1434	/*
1435	 * The cursor is just a perfectly ordinary secondary database cursor.
1436	 * Call its c_pget() method to do the dirty work.
1437	 */
1438	if (flags == 0 || flags == DB_RMW)
1439		flags |= DB_SET;
1440
1441	ret = __dbc_pget(dbc, skey, pkey, data, flags);
1442
1443	if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
1444		ret = t_ret;
1445
1446	return (ret);
1447}
1448
1449/*
1450 * __db_pget_arg --
1451 *	Check DB->pget arguments.
1452 */
1453static int
1454__db_pget_arg(dbp, pkey, flags)
1455	DB *dbp;
1456	DBT *pkey;
1457	u_int32_t flags;
1458{
1459	ENV *env;
1460	int ret;
1461
1462	env = dbp->env;
1463
1464	if (!F_ISSET(dbp, DB_AM_SECONDARY)) {
1465		__db_errx(env,
1466		    "DB->pget may only be used on secondary indices");
1467		return (EINVAL);
1468	}
1469
1470	if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) {
1471		__db_errx(env,
1472	"DB_MULTIPLE and DB_MULTIPLE_KEY may not be used on secondary indices");
1473		return (EINVAL);
1474	}
1475
1476	/* DB_CONSUME makes no sense on a secondary index. */
1477	LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW);
1478	switch (flags) {
1479	case DB_CONSUME:
1480	case DB_CONSUME_WAIT:
1481		return (__db_ferr(env, "DB->pget", 0));
1482	default:
1483		/* __db_get_arg will catch the rest. */
1484		break;
1485	}
1486
1487	/*
1488	 * We allow the pkey field to be NULL, so that we can make the
1489	 * two-DBT get calls into wrappers for the three-DBT ones.
1490	 */
1491	if (pkey != NULL &&
1492	    (ret = __dbt_ferr(dbp, "primary key", pkey, 1)) != 0)
1493		return (ret);
1494
1495	if (flags == DB_GET_BOTH) {
1496		/* The pkey field can't be NULL if we're doing a DB_GET_BOTH. */
1497		if (pkey == NULL) {
1498			__db_errx(env,
1499		    "DB_GET_BOTH on a secondary index requires a primary key");
1500			return (EINVAL);
1501		}
1502		if ((ret = __dbt_usercopy(env, pkey)) != 0)
1503			return (ret);
1504	}
1505
1506	return (0);
1507}
1508
1509/*
1510 * __db_put_pp --
1511 *	DB->put pre/post processing.
1512 *
1513 * PUBLIC: int __db_put_pp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
1514 */
1515int
1516__db_put_pp(dbp, txn, key, data, flags)
1517	DB *dbp;
1518	DB_TXN *txn;
1519	DBT *key, *data;
1520	u_int32_t flags;
1521{
1522	DB_THREAD_INFO *ip;
1523	ENV *env;
1524	int handle_check, ret, txn_local, t_ret;
1525
1526	env = dbp->env;
1527	txn_local = 0;
1528
1529	STRIP_AUTO_COMMIT(flags);
1530	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->put");
1531
1532	if ((ret = __db_put_arg(dbp, key, data, flags)) != 0)
1533		return (ret);
1534
1535	ENV_ENTER(env, ip);
1536
1537	/* Check for replication block. */
1538	handle_check = IS_ENV_REPLICATED(env);
1539	if (handle_check &&
1540	    (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) {
1541		handle_check = 0;
1542		goto err;
1543	}
1544
1545	/* Create local transaction as necessary. */
1546	if (IS_DB_AUTO_COMMIT(dbp, txn)) {
1547		if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0)
1548			goto err;
1549		txn_local = 1;
1550	}
1551
1552	/* Check for consistent transaction usage. */
1553	if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0)
1554		goto err;
1555
1556	ret = __db_put(dbp, ip, txn, key, data, flags);
1557
1558err:	if (txn_local &&
1559	    (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0)
1560		ret = t_ret;
1561
1562	/* Release replication block. */
1563	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
1564		ret = t_ret;
1565
1566	ENV_LEAVE(env, ip);
1567	__dbt_userfree(env, key, NULL, data);
1568	return (ret);
1569}
1570
1571/*
1572 * __db_put_arg --
1573 *	Check DB->put arguments.
1574 */
1575static int
1576__db_put_arg(dbp, key, data, flags)
1577	DB *dbp;
1578	DBT *key, *data;
1579	u_int32_t flags;
1580{
1581	ENV *env;
1582	int ret, returnkey;
1583
1584	env = dbp->env;
1585	returnkey = 0;
1586
1587	/* Check for changes to a read-only tree. */
1588	if (DB_IS_READONLY(dbp))
1589		return (__db_rdonly(env, "DB->put"));
1590
1591	/* Check for puts on a secondary. */
1592	if (F_ISSET(dbp, DB_AM_SECONDARY)) {
1593		__db_errx(env, "DB->put forbidden on secondary indices");
1594		return (EINVAL);
1595	}
1596
1597	/* Check for invalid function flags. */
1598	switch (flags) {
1599	case 0:
1600	case DB_NOOVERWRITE:
1601		break;
1602	case DB_APPEND:
1603		if (dbp->type != DB_RECNO && dbp->type != DB_QUEUE)
1604			goto err;
1605		returnkey = 1;
1606		break;
1607	case DB_NODUPDATA:
1608		if (F_ISSET(dbp, DB_AM_DUPSORT))
1609			break;
1610		/* FALLTHROUGH */
1611	default:
1612err:		return (__db_ferr(env, "DB->put", 0));
1613	}
1614
1615	/*
1616	 * Check for invalid key/data flags.  The key may reasonably be NULL
1617	 * if DB_APPEND is set and the application doesn't care about the
1618	 * returned key.
1619	 */
1620	if (((returnkey && key != NULL) || !returnkey) &&
1621	    (ret = __dbt_ferr(dbp, "key", key, returnkey)) != 0)
1622		return (ret);
1623	if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0)
1624		return (ret);
1625
1626	/*
1627	 * The key parameter should not be NULL or have the "partial" flag set
1628	 * in a put call unless the user doesn't care about a key value we'd
1629	 * return.  The user tells us they don't care about the returned key by
1630	 * setting the key parameter to NULL or configuring the key DBT to not
1631	 * return any information.  (Returned keys from a put are always record
1632	 * numbers, and returning part of a record number  doesn't make sense:
1633	 * only accept a partial return if the length returned is 0.)
1634	 */
1635	if ((returnkey &&
1636	    key != NULL && F_ISSET(key, DB_DBT_PARTIAL) && key->dlen != 0) ||
1637	    (!returnkey && F_ISSET(key, DB_DBT_PARTIAL)))
1638		return (__db_ferr(env, "key DBT", 0));
1639
1640	/* Check for partial puts in the presence of duplicates. */
1641	if (F_ISSET(data, DB_DBT_PARTIAL) &&
1642	    (F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK))) {
1643		__db_errx(env,
1644"a partial put in the presence of duplicates requires a cursor operation");
1645		return (EINVAL);
1646	}
1647
1648	if ((flags != DB_APPEND && (ret = __dbt_usercopy(env, key)) != 0) ||
1649	    (ret = __dbt_usercopy(env, data)) != 0)
1650		return (ret);
1651
1652	return (0);
1653}
1654
1655/*
1656 * __db_compact_pp --
1657 *	DB->compact pre/post processing.
1658 *
1659 * PUBLIC: int __db_compact_pp __P((DB *, DB_TXN *,
1660 * PUBLIC:       DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *));
1661 */
1662int
1663__db_compact_pp(dbp, txn, start, stop, c_data, flags, end)
1664	DB *dbp;
1665	DB_TXN *txn;
1666	DBT *start, *stop;
1667	DB_COMPACT *c_data;
1668	u_int32_t flags;
1669	DBT *end;
1670{
1671	DB_COMPACT *dp, l_data;
1672	DB_THREAD_INFO *ip;
1673	ENV *env;
1674	int handle_check, ret, t_ret;
1675
1676	env = dbp->env;
1677
1678	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->compact");
1679
1680	/*
1681	 * !!!
1682	 * The actual argument checking is simple, do it inline, outside of
1683	 * the replication block.
1684	 */
1685	if ((ret = __db_fchk(
1686	    env, "DB->compact", flags, DB_FREELIST_ONLY | DB_FREE_SPACE)) != 0)
1687		return (ret);
1688
1689	/* Check for changes to a read-only database. */
1690	if (DB_IS_READONLY(dbp))
1691		return (__db_rdonly(env, "DB->compact"));
1692
1693	if (start != NULL && (ret = __dbt_usercopy(env, start)) != 0)
1694		return (ret);
1695	if (stop != NULL && (ret = __dbt_usercopy(env, stop)) != 0)
1696		return (ret);
1697
1698	ENV_ENTER(env, ip);
1699
1700	/* Check for replication block. */
1701	handle_check = IS_ENV_REPLICATED(env);
1702	if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) {
1703		handle_check = 0;
1704		goto err;
1705	}
1706
1707	if (c_data == NULL) {
1708		dp = &l_data;
1709		memset(dp, 0, sizeof(*dp));
1710	} else
1711		dp = c_data;
1712
1713	switch (dbp->type) {
1714	case DB_HASH:
1715		if (!LF_ISSET(DB_FREELIST_ONLY))
1716			goto err;
1717		/* FALLTHROUGH */
1718	case DB_BTREE:
1719	case DB_RECNO:
1720		ret = __bam_compact(dbp, ip, txn, start, stop, dp, flags, end);
1721		break;
1722
1723	default:
1724err:		ret = __dbh_am_chk(dbp, DB_OK_BTREE);
1725		break;
1726	}
1727
1728	/* Release replication block. */
1729	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
1730		ret = t_ret;
1731
1732	ENV_LEAVE(env, ip);
1733	__dbt_userfree(env, start, stop, NULL);
1734	return (ret);
1735}
1736
1737/*
1738 * __db_associate_foreign_pp --
1739 *	DB->associate_foreign pre/post processing.
1740 *
1741 * PUBLIC: int __db_associate_foreign_pp __P((DB *, DB *,
1742 * PUBLIC:     int (*)(DB *, const DBT *, DBT *, const DBT *, int *),
1743 * PUBLIC:     u_int32_t));
1744 */
1745int
1746__db_associate_foreign_pp(fdbp, dbp, callback, flags)
1747	DB *dbp, *fdbp;
1748	int (*callback) __P((DB *, const DBT *, DBT *, const DBT *, int *));
1749	u_int32_t flags;
1750{
1751	/* Most of this is based on the implementation of associate */
1752	DB_THREAD_INFO *ip;
1753	ENV *env;
1754	int handle_check, ret, t_ret;
1755
1756	env = dbp->env;
1757
1758	PANIC_CHECK(env);
1759	STRIP_AUTO_COMMIT(flags);
1760
1761	ENV_ENTER(env, ip);
1762
1763	/* Check for replication block. */
1764	handle_check = IS_ENV_REPLICATED(env);
1765	if (handle_check &&
1766	    (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) {
1767		handle_check = 0;
1768		goto err;
1769	}
1770
1771	if ((ret = __db_associate_foreign_arg(fdbp, dbp, callback, flags)) != 0)
1772		goto err;
1773
1774	ret = __db_associate_foreign(fdbp, dbp, callback, flags);
1775
1776err:	/* Release replication block. */
1777	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
1778		ret = t_ret;
1779	ENV_LEAVE(env, ip);
1780	return (ret);
1781}
1782
1783/*
1784 * __db_associate_foreign_arg --
1785 *	DB->associate_foreign argument checking.
1786 */
1787static int
1788__db_associate_foreign_arg(fdbp, dbp, callback, flags)
1789	DB *dbp, *fdbp;
1790	int (*callback) __P((DB *, const DBT *, DBT *, const DBT *, int *));
1791	u_int32_t flags;
1792{
1793	ENV *env;
1794
1795	env = fdbp->env;
1796
1797	if (F_ISSET(fdbp, DB_AM_SECONDARY)) {
1798		__db_errx(env,
1799		    "Secondary indices may not be used as foreign databases");
1800		return (EINVAL);
1801	}
1802	if (F_ISSET(fdbp, DB_AM_DUP)) {
1803		__db_errx(env,
1804		    "Foreign databases may not be configured with duplicates");
1805		return (EINVAL);
1806	}
1807	if (F_ISSET(fdbp, DB_AM_RENUMBER)) {
1808		__db_errx(env,
1809	    "Renumbering recno databases may not be used as foreign databases");
1810		return (EINVAL);
1811	}
1812	if (!F_ISSET(dbp, DB_AM_SECONDARY)) {
1813		__db_errx(env,
1814		    "The associating database must be a secondary index.");
1815		return (EINVAL);
1816	}
1817	if (LF_ISSET(DB_FOREIGN_NULLIFY) && callback == NULL) {
1818		__db_errx(env,
1819	"When specifying a delete action of nullify, a callback function needs to be configured");
1820		return (EINVAL);
1821	} else if (!LF_ISSET(DB_FOREIGN_NULLIFY) && callback != NULL) {
1822		__db_errx(env,
1823	"When not specifying a delete action of nullify, a callback function cannot be configured");
1824		return (EINVAL);
1825	}
1826
1827	return (0);
1828}
1829
1830/*
1831 * __db_sync_pp --
1832 *	DB->sync pre/post processing.
1833 *
1834 * PUBLIC: int __db_sync_pp __P((DB *, u_int32_t));
1835 */
1836int
1837__db_sync_pp(dbp, flags)
1838	DB *dbp;
1839	u_int32_t flags;
1840{
1841	DB_THREAD_INFO *ip;
1842	ENV *env;
1843	int handle_check, ret, t_ret;
1844
1845	env = dbp->env;
1846
1847	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->sync");
1848
1849	/*
1850	 * !!!
1851	 * The actual argument checking is simple, do it inline, outside of
1852	 * the replication block.
1853	 */
1854	if (flags != 0)
1855		return (__db_ferr(env, "DB->sync", 0));
1856
1857	ENV_ENTER(env, ip);
1858
1859	/* Check for replication block. */
1860	handle_check = IS_ENV_REPLICATED(env);
1861	if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) {
1862		handle_check = 0;
1863		goto err;
1864	}
1865
1866	ret = __db_sync(dbp);
1867
1868	/* Release replication block. */
1869	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
1870		ret = t_ret;
1871
1872err:	ENV_LEAVE(env, ip);
1873	return (ret);
1874}
1875
1876/*
1877 * __dbc_close_pp --
1878 *	DBC->close pre/post processing.
1879 *
1880 * PUBLIC: int __dbc_close_pp __P((DBC *));
1881 */
1882int
1883__dbc_close_pp(dbc)
1884	DBC *dbc;
1885{
1886	DB *dbp;
1887	DB_THREAD_INFO *ip;
1888	ENV *env;
1889	int handle_check, ret, t_ret;
1890
1891	dbp = dbc->dbp;
1892	env = dbp->env;
1893
1894	/*
1895	 * If the cursor is already closed we have a serious problem, and we
1896	 * assume that the cursor isn't on the active queue.  Don't do any of
1897	 * the remaining cursor close processing.
1898	 */
1899	if (!F_ISSET(dbc, DBC_ACTIVE)) {
1900		__db_errx(env, "Closing already-closed cursor");
1901		return (EINVAL);
1902	}
1903
1904	ENV_ENTER(env, ip);
1905
1906	/* Check for replication block. */
1907	handle_check = dbc->txn == NULL && IS_ENV_REPLICATED(env);
1908	ret = __dbc_close(dbc);
1909
1910	/* Release replication block. */
1911	if (handle_check &&
1912	    (t_ret = __op_rep_exit(env)) != 0 && ret == 0)
1913		ret = t_ret;
1914
1915	ENV_LEAVE(env, ip);
1916	return (ret);
1917}
1918
1919/*
1920 * __dbc_count_pp --
1921 *	DBC->count pre/post processing.
1922 *
1923 * PUBLIC: int __dbc_count_pp __P((DBC *, db_recno_t *, u_int32_t));
1924 */
1925int
1926__dbc_count_pp(dbc, recnop, flags)
1927	DBC *dbc;
1928	db_recno_t *recnop;
1929	u_int32_t flags;
1930{
1931	DB *dbp;
1932	DB_THREAD_INFO *ip;
1933	ENV *env;
1934	int ret;
1935
1936	dbp = dbc->dbp;
1937	env = dbp->env;
1938
1939	/*
1940	 * !!!
1941	 * The actual argument checking is simple, do it inline, outside of
1942	 * the replication block.
1943	 *
1944	 * The cursor must be initialized, return EINVAL for an invalid cursor.
1945	 */
1946	if (flags != 0)
1947		return (__db_ferr(env, "DBcursor->count", 0));
1948
1949	if (!IS_INITIALIZED(dbc))
1950		return (__db_curinval(env));
1951
1952	ENV_ENTER(env, ip);
1953	ret = __dbc_count(dbc, recnop);
1954	ENV_LEAVE(env, ip);
1955	return (ret);
1956}
1957
1958/*
1959 * __dbc_del_pp --
1960 *	DBC->del pre/post processing.
1961 *
1962 * PUBLIC: int __dbc_del_pp __P((DBC *, u_int32_t));
1963 */
1964int
1965__dbc_del_pp(dbc, flags)
1966	DBC *dbc;
1967	u_int32_t flags;
1968{
1969	DB *dbp;
1970	DB_THREAD_INFO *ip;
1971	ENV *env;
1972	int ret;
1973
1974	dbp = dbc->dbp;
1975	env = dbp->env;
1976
1977	if ((ret = __dbc_del_arg(dbc, flags)) != 0)
1978		return (ret);
1979
1980	ENV_ENTER(env, ip);
1981
1982	/* Check for consistent transaction usage. */
1983	if ((ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0)
1984		goto err;
1985
1986	DEBUG_LWRITE(dbc, dbc->txn, "DBcursor->del", NULL, NULL, flags);
1987	ret = __dbc_del(dbc, flags);
1988
1989err:	ENV_LEAVE(env, ip);
1990	return (ret);
1991}
1992
1993/*
1994 * __dbc_del_arg --
1995 *	Check DBC->del arguments.
1996 */
1997static int
1998__dbc_del_arg(dbc, flags)
1999	DBC *dbc;
2000	u_int32_t flags;
2001{
2002	DB *dbp;
2003	ENV *env;
2004
2005	dbp = dbc->dbp;
2006	env = dbp->env;
2007
2008	/* Check for changes to a read-only tree. */
2009	if (DB_IS_READONLY(dbp))
2010		return (__db_rdonly(env, "DBcursor->del"));
2011
2012	/* Check for invalid function flags. */
2013	switch (flags) {
2014	case 0:
2015		break;
2016	case DB_UPDATE_SECONDARY:
2017		DB_ASSERT(env, F_ISSET(dbp, DB_AM_SECONDARY));
2018		break;
2019	default:
2020		return (__db_ferr(env, "DBcursor->del", 0));
2021	}
2022
2023	/*
2024	 * The cursor must be initialized, return EINVAL for an invalid cursor,
2025	 * otherwise 0.
2026	 */
2027	if (!IS_INITIALIZED(dbc))
2028		return (__db_curinval(env));
2029
2030	return (0);
2031}
2032
2033/*
2034 * __dbc_dup_pp --
2035 *	DBC->dup pre/post processing.
2036 *
2037 * PUBLIC: int __dbc_dup_pp __P((DBC *, DBC **, u_int32_t));
2038 */
2039int
2040__dbc_dup_pp(dbc, dbcp, flags)
2041	DBC *dbc, **dbcp;
2042	u_int32_t flags;
2043{
2044	DB *dbp;
2045	DB_THREAD_INFO *ip;
2046	ENV *env;
2047	int ret;
2048
2049	dbp = dbc->dbp;
2050	env = dbp->env;
2051
2052	/*
2053	 * !!!
2054	 * The actual argument checking is simple, do it inline, outside of
2055	 * the replication block.
2056	 */
2057	if (flags != 0 && flags != DB_POSITION)
2058		return (__db_ferr(env, "DBcursor->dup", 0));
2059
2060	ENV_ENTER(env, ip);
2061	ret = __dbc_dup(dbc, dbcp, flags);
2062	ENV_LEAVE(env, ip);
2063	return (ret);
2064}
2065
2066/*
2067 * __dbc_get_pp --
2068 *	DBC->get pre/post processing.
2069 *
2070 * PUBLIC: int __dbc_get_pp __P((DBC *, DBT *, DBT *, u_int32_t));
2071 */
2072int
2073__dbc_get_pp(dbc, key, data, flags)
2074	DBC *dbc;
2075	DBT *key, *data;
2076	u_int32_t flags;
2077{
2078	DB *dbp;
2079	DB_THREAD_INFO *ip;
2080	ENV *env;
2081	int ignore_lease, ret;
2082
2083	dbp = dbc->dbp;
2084	env = dbp->env;
2085
2086	ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0;
2087	LF_CLR(DB_IGNORE_LEASE);
2088	if ((ret = __dbc_get_arg(dbc, key, data, flags)) != 0)
2089		return (ret);
2090
2091	ENV_ENTER(env, ip);
2092
2093	DEBUG_LREAD(dbc, dbc->txn, "DBcursor->get",
2094	    flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags);
2095	ret = __dbc_get(dbc, key, data, flags);
2096
2097	/*
2098	 * Check for master leases.
2099	 */
2100	if (ret == 0 &&
2101	    IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease)
2102		ret = __rep_lease_check(env, 1);
2103
2104	ENV_LEAVE(env, ip);
2105	__dbt_userfree(env, key, NULL, data);
2106	return (ret);
2107}
2108
2109/*
2110 * __dbc_get_arg --
2111 *	Common DBC->get argument checking, used by both DBC->get and DBC->pget.
2112 */
2113static int
2114__dbc_get_arg(dbc, key, data, flags)
2115	DBC *dbc;
2116	DBT *key, *data;
2117	u_int32_t flags;
2118{
2119	DB *dbp;
2120	ENV *env;
2121	int dirty, multi, ret;
2122
2123	dbp = dbc->dbp;
2124	env = dbp->env;
2125
2126	/*
2127	 * Typically in checking routines that modify the flags, we have
2128	 * to save them and restore them, because the checking routine
2129	 * calls the work routine.  However, this is a pure-checking
2130	 * routine which returns to a function that calls the work routine,
2131	 * so it's OK that we do not save and restore the flags, even though
2132	 * we modify them.
2133	 *
2134	 * Check for read-modify-write validity.  DB_RMW doesn't make sense
2135	 * with CDB cursors since if you're going to write the cursor, you
2136	 * had to create it with DB_WRITECURSOR.  Regardless, we check for
2137	 * LOCKING_ON and not STD_LOCKING, as we don't want to disallow it.
2138	 * If this changes, confirm that DB does not itself set the DB_RMW
2139	 * flag in a path where CDB may have been configured.
2140	 */
2141	dirty = 0;
2142	if (LF_ISSET(DB_READ_UNCOMMITTED | DB_RMW)) {
2143		if (!LOCKING_ON(env))
2144			return (__db_fnl(env, "DBcursor->get"));
2145		if (LF_ISSET(DB_READ_UNCOMMITTED))
2146			dirty = 1;
2147		LF_CLR(DB_READ_UNCOMMITTED | DB_RMW);
2148	}
2149
2150	multi = 0;
2151	if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) {
2152		multi = 1;
2153		if (LF_ISSET(DB_MULTIPLE) && LF_ISSET(DB_MULTIPLE_KEY))
2154			goto multi_err;
2155		LF_CLR(DB_MULTIPLE | DB_MULTIPLE_KEY);
2156	}
2157
2158	/* Check for invalid function flags. */
2159	switch (flags) {
2160	case DB_CONSUME:
2161	case DB_CONSUME_WAIT:
2162		if (dirty) {
2163			__db_errx(env,
2164    "DB_READ_UNCOMMITTED is not supported with DB_CONSUME or DB_CONSUME_WAIT");
2165			return (EINVAL);
2166		}
2167		if (dbp->type != DB_QUEUE)
2168			goto err;
2169		break;
2170	case DB_CURRENT:
2171	case DB_FIRST:
2172	case DB_NEXT:
2173	case DB_NEXT_DUP:
2174	case DB_NEXT_NODUP:
2175		break;
2176	case DB_LAST:
2177	case DB_PREV:
2178	case DB_PREV_DUP:
2179	case DB_PREV_NODUP:
2180		if (multi)
2181multi_err:		return (__db_ferr(env, "DBcursor->get", 1));
2182		break;
2183	case DB_GET_BOTHC:
2184		if (dbp->type == DB_QUEUE)
2185			goto err;
2186		/* FALLTHROUGH */
2187	case DB_GET_BOTH:
2188	case DB_GET_BOTH_RANGE:
2189		if ((ret = __dbt_usercopy(env, data)) != 0)
2190			goto err;
2191		/* FALLTHROUGH */
2192	case DB_SET:
2193	case DB_SET_RANGE:
2194		if ((ret = __dbt_usercopy(env, key)) != 0)
2195			goto err;
2196		break;
2197	case DB_GET_RECNO:
2198		/*
2199		 * The one situation in which this might be legal with a
2200		 * non-RECNUM dbp is if dbp is a secondary and its primary is
2201		 * DB_AM_RECNUM.
2202		 */
2203		if (!F_ISSET(dbp, DB_AM_RECNUM) &&
2204		    (!F_ISSET(dbp, DB_AM_SECONDARY) ||
2205		    !F_ISSET(dbp->s_primary, DB_AM_RECNUM)))
2206			goto err;
2207		break;
2208	case DB_SET_RECNO:
2209		if (!F_ISSET(dbp, DB_AM_RECNUM))
2210			goto err;
2211		if ((ret = __dbt_usercopy(env, key)) != 0)
2212			goto err;
2213		break;
2214	default:
2215err:		__dbt_userfree(env, key, NULL, data);
2216		return (__db_ferr(env, "DBcursor->get", 0));
2217	}
2218
2219	/* Check for invalid key/data flags. */
2220	if ((ret = __dbt_ferr(dbp, "key", key, 0)) != 0)
2221		return (ret);
2222	if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0)
2223		return (ret);
2224
2225	if (multi) {
2226		if (!F_ISSET(data, DB_DBT_USERMEM)) {
2227			__db_errx(env,
2228	    "DB_MULTIPLE/DB_MULTIPLE_KEY require DB_DBT_USERMEM be set");
2229			return (EINVAL);
2230		}
2231		if (F_ISSET(key, DB_DBT_PARTIAL) ||
2232		    F_ISSET(data, DB_DBT_PARTIAL)) {
2233			__db_errx(env,
2234	    "DB_MULTIPLE/DB_MULTIPLE_KEY do not support DB_DBT_PARTIAL");
2235			return (EINVAL);
2236		}
2237		if (data->ulen < 1024 ||
2238		    data->ulen < dbp->pgsize || data->ulen % 1024 != 0) {
2239			__db_errx(env, "%s%s",
2240			    "DB_MULTIPLE/DB_MULTIPLE_KEY buffers must be ",
2241			    "aligned, at least page size and multiples of 1KB");
2242			return (EINVAL);
2243		}
2244	}
2245
2246	/*
2247	 * The cursor must be initialized for DB_CURRENT, DB_GET_RECNO,
2248	 * DB_PREV_DUP and DB_NEXT_DUP.  Return EINVAL for an invalid
2249	 * cursor, otherwise 0.
2250	 */
2251	if (!IS_INITIALIZED(dbc) && (flags == DB_CURRENT ||
2252	    flags == DB_GET_RECNO ||
2253	    flags == DB_NEXT_DUP || flags == DB_PREV_DUP))
2254		return (__db_curinval(env));
2255
2256	/* Check for consistent transaction usage. */
2257	if (LF_ISSET(DB_RMW) &&
2258	    (ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0)
2259		return (ret);
2260
2261	return (0);
2262}
2263
2264/*
2265 * __db_secondary_close_pp --
2266 *	DB->close for secondaries
2267 *
2268 * PUBLIC: int __db_secondary_close_pp __P((DB *, u_int32_t));
2269 */
2270int
2271__db_secondary_close_pp(dbp, flags)
2272	DB *dbp;
2273	u_int32_t flags;
2274{
2275	DB_THREAD_INFO *ip;
2276	ENV *env;
2277	int handle_check, ret, t_ret;
2278
2279	env = dbp->env;
2280	ret = 0;
2281
2282	/*
2283	 * As a DB handle destructor, we can't fail.
2284	 *
2285	 * !!!
2286	 * The actual argument checking is simple, do it inline, outside of
2287	 * the replication block.
2288	 */
2289	if (flags != 0 && flags != DB_NOSYNC)
2290		ret = __db_ferr(env, "DB->close", 0);
2291
2292	ENV_ENTER(env, ip);
2293
2294	/* Check for replication block. */
2295	handle_check = IS_ENV_REPLICATED(env);
2296	if (handle_check && (t_ret = __db_rep_enter(dbp, 0, 0, 0)) != 0) {
2297		handle_check = 0;
2298		if (ret == 0)
2299			ret = t_ret;
2300	}
2301
2302	if ((t_ret = __db_secondary_close(dbp, flags)) != 0 && ret == 0)
2303		ret = t_ret;
2304
2305	/* Release replication block. */
2306	if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
2307		ret = t_ret;
2308
2309	ENV_LEAVE(env, ip);
2310	return (ret);
2311}
2312
2313/*
2314 * __dbc_pget_pp --
2315 *	DBC->pget pre/post processing.
2316 *
2317 * PUBLIC: int __dbc_pget_pp __P((DBC *, DBT *, DBT *, DBT *, u_int32_t));
2318 */
2319int
2320__dbc_pget_pp(dbc, skey, pkey, data, flags)
2321	DBC *dbc;
2322	DBT *skey, *pkey, *data;
2323	u_int32_t flags;
2324{
2325	DB *dbp;
2326	DB_THREAD_INFO *ip;
2327	ENV *env;
2328	int ignore_lease, ret;
2329
2330	dbp = dbc->dbp;
2331	env = dbp->env;
2332
2333	ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0;
2334	LF_CLR(DB_IGNORE_LEASE);
2335	if ((ret = __dbc_pget_arg(dbc, pkey, flags)) != 0 ||
2336	    (ret = __dbc_get_arg(dbc, skey, data, flags)) != 0)
2337		return (ret);
2338
2339	ENV_ENTER(env, ip);
2340	ret = __dbc_pget(dbc, skey, pkey, data, flags);
2341	/*
2342	 * Check for master leases.
2343	 */
2344	if (ret == 0 &&
2345	    IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease)
2346		ret = __rep_lease_check(env, 1);
2347
2348	ENV_LEAVE(env, ip);
2349
2350	__dbt_userfree(env, skey, pkey, data);
2351	return (ret);
2352}
2353
2354/*
2355 * __dbc_pget_arg --
2356 *	Check DBC->pget arguments.
2357 */
2358static int
2359__dbc_pget_arg(dbc, pkey, flags)
2360	DBC *dbc;
2361	DBT *pkey;
2362	u_int32_t flags;
2363{
2364	DB *dbp;
2365	ENV *env;
2366	int ret;
2367
2368	dbp = dbc->dbp;
2369	env = dbp->env;
2370
2371	if (!F_ISSET(dbp, DB_AM_SECONDARY)) {
2372		__db_errx(env,
2373		    "DBcursor->pget may only be used on secondary indices");
2374		return (EINVAL);
2375	}
2376
2377	if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) {
2378		__db_errx(env,
2379	"DB_MULTIPLE and DB_MULTIPLE_KEY may not be used on secondary indices");
2380		return (EINVAL);
2381	}
2382
2383	switch (LF_ISSET(DB_OPFLAGS_MASK)) {
2384	case DB_CONSUME:
2385	case DB_CONSUME_WAIT:
2386		/* These flags make no sense on a secondary index. */
2387		return (__db_ferr(env, "DBcursor->pget", 0));
2388	case DB_GET_BOTH:
2389	case DB_GET_BOTH_RANGE:
2390		/* BOTH is "get both the primary and the secondary". */
2391		if (pkey == NULL) {
2392			__db_errx(env,
2393			    "%s requires both a secondary and a primary key",
2394			     LF_ISSET(DB_GET_BOTH) ?
2395			     "DB_GET_BOTH" : "DB_GET_BOTH_RANGE");
2396			return (EINVAL);
2397		}
2398		if ((ret = __dbt_usercopy(env, pkey)) != 0)
2399			return (ret);
2400		break;
2401	default:
2402		/* __dbc_get_arg will catch the rest. */
2403		break;
2404	}
2405
2406	/*
2407	 * We allow the pkey field to be NULL, so that we can make the
2408	 * two-DBT get calls into wrappers for the three-DBT ones.
2409	 */
2410	if (pkey != NULL &&
2411	    (ret = __dbt_ferr(dbp, "primary key", pkey, 0)) != 0)
2412		return (ret);
2413
2414	/* But the pkey field can't be NULL if we're doing a DB_GET_BOTH. */
2415	if (pkey == NULL && (flags & DB_OPFLAGS_MASK) == DB_GET_BOTH) {
2416		__db_errx(env,
2417		    "DB_GET_BOTH on a secondary index requires a primary key");
2418		return (EINVAL);
2419	}
2420	return (0);
2421}
2422
2423/*
2424 * __dbc_put_pp --
2425 *	DBC->put pre/post processing.
2426 *
2427 * PUBLIC: int __dbc_put_pp __P((DBC *, DBT *, DBT *, u_int32_t));
2428 */
2429int
2430__dbc_put_pp(dbc, key, data, flags)
2431	DBC *dbc;
2432	DBT *key, *data;
2433	u_int32_t flags;
2434{
2435	DB *dbp;
2436	DB_THREAD_INFO *ip;
2437	ENV *env;
2438	int ret;
2439
2440	dbp = dbc->dbp;
2441	env = dbp->env;
2442
2443	if ((ret = __dbc_put_arg(dbc, key, data, flags)) != 0)
2444		return (ret);
2445
2446	ENV_ENTER(env, ip);
2447
2448	/* Check for consistent transaction usage. */
2449	if ((ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0)
2450		goto err;
2451
2452	DEBUG_LWRITE(dbc, dbc->txn, "DBcursor->put",
2453	    flags == DB_KEYFIRST || flags == DB_KEYLAST ||
2454	    flags == DB_NODUPDATA || flags == DB_UPDATE_SECONDARY ?
2455	    key : NULL, data, flags);
2456	ret = __dbc_put(dbc, key, data, flags);
2457
2458err:	ENV_LEAVE(env, ip);
2459	__dbt_userfree(env, key, NULL, data);
2460	return (ret);
2461}
2462
2463/*
2464 * __dbc_put_arg --
2465 *	Check DBC->put arguments.
2466 */
2467static int
2468__dbc_put_arg(dbc, key, data, flags)
2469	DBC *dbc;
2470	DBT *key, *data;
2471	u_int32_t flags;
2472{
2473	DB *dbp;
2474	ENV *env;
2475	int key_flags, ret;
2476
2477	dbp = dbc->dbp;
2478	env = dbp->env;
2479	key_flags = 0;
2480
2481	/* Check for changes to a read-only tree. */
2482	if (DB_IS_READONLY(dbp))
2483		return (__db_rdonly(env, "DBcursor->put"));
2484
2485	/* Check for puts on a secondary. */
2486	if (F_ISSET(dbp, DB_AM_SECONDARY)) {
2487		if (flags == DB_UPDATE_SECONDARY)
2488			flags = DB_KEYLAST;
2489		else {
2490			__db_errx(env,
2491		    "DBcursor->put forbidden on secondary indices");
2492			return (EINVAL);
2493		}
2494	}
2495
2496	if ((ret = __dbt_usercopy(env, data)) != 0)
2497		return (ret);
2498
2499	/* Check for invalid function flags. */
2500	switch (flags) {
2501	case DB_AFTER:
2502	case DB_BEFORE:
2503		switch (dbp->type) {
2504		case DB_BTREE:
2505		case DB_HASH:		/* Only with unsorted duplicates. */
2506			if (!F_ISSET(dbp, DB_AM_DUP))
2507				goto err;
2508			if (dbp->dup_compare != NULL)
2509				goto err;
2510			break;
2511		case DB_QUEUE:		/* Not permitted. */
2512			goto err;
2513		case DB_RECNO:		/* Only with mutable record numbers. */
2514			if (!F_ISSET(dbp, DB_AM_RENUMBER))
2515				goto err;
2516			key_flags = key == NULL ? 0 : 1;
2517			break;
2518		case DB_UNKNOWN:
2519		default:
2520			goto err;
2521		}
2522		break;
2523	case DB_CURRENT:
2524		/*
2525		 * If there is a comparison function, doing a DB_CURRENT
2526		 * must not change the part of the data item that is used
2527		 * for the comparison.
2528		 */
2529		break;
2530	case DB_NODUPDATA:
2531		if (!F_ISSET(dbp, DB_AM_DUPSORT))
2532			goto err;
2533		/* FALLTHROUGH */
2534	case DB_KEYFIRST:
2535	case DB_KEYLAST:
2536		key_flags = 1;
2537		if ((ret = __dbt_usercopy(env, key)) != 0)
2538			return (ret);
2539		break;
2540	default:
2541err:		return (__db_ferr(env, "DBcursor->put", 0));
2542	}
2543
2544	/*
2545	 * Check for invalid key/data flags.  The key may reasonably be NULL
2546	 * if DB_AFTER or DB_BEFORE is set and the application doesn't care
2547	 * about the returned key, or if the DB_CURRENT flag is set.
2548	 */
2549	if (key_flags && (ret = __dbt_ferr(dbp, "key", key, 0)) != 0)
2550		return (ret);
2551	if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0)
2552		return (ret);
2553
2554	/*
2555	 * The key parameter should not be NULL or have the "partial" flag set
2556	 * in a put call unless the user doesn't care about a key value we'd
2557	 * return.  The user tells us they don't care about the returned key by
2558	 * setting the key parameter to NULL or configuring the key DBT to not
2559	 * return any information.  (Returned keys from a put are always record
2560	 * numbers, and returning part of a record number  doesn't make sense:
2561	 * only accept a partial return if the length returned is 0.)
2562	 */
2563	if (key_flags && F_ISSET(key, DB_DBT_PARTIAL) && key->dlen != 0)
2564		return (__db_ferr(env, "key DBT", 0));
2565
2566	/*
2567	 * The cursor must be initialized for anything other than DB_KEYFIRST
2568	 * and DB_KEYLAST, return EINVAL for an invalid cursor, otherwise 0.
2569	 */
2570	if (!IS_INITIALIZED(dbc) && flags != DB_KEYFIRST &&
2571	    flags != DB_KEYLAST && flags != DB_NODUPDATA)
2572		return (__db_curinval(env));
2573
2574	return (0);
2575}
2576
2577/*
2578 * __dbt_ferr --
2579 *	Check a DBT for flag errors.
2580 */
2581static int
2582__dbt_ferr(dbp, name, dbt, check_thread)
2583	const DB *dbp;
2584	const char *name;
2585	const DBT *dbt;
2586	int check_thread;
2587{
2588	ENV *env;
2589	int ret;
2590
2591	env = dbp->env;
2592
2593	/*
2594	 * Check for invalid DBT flags.  We allow any of the flags to be
2595	 * specified to any DB or DBcursor call so that applications can
2596	 * set DB_DBT_MALLOC when retrieving a data item from a secondary
2597	 * database and then specify that same DBT as a key to a primary
2598	 * database, without having to clear flags.
2599	 */
2600	if ((ret = __db_fchk(env, name, dbt->flags, DB_DBT_APPMALLOC |
2601	    DB_DBT_MALLOC | DB_DBT_DUPOK | DB_DBT_REALLOC |
2602	    DB_DBT_USERCOPY | DB_DBT_USERMEM | DB_DBT_PARTIAL)) != 0)
2603		return (ret);
2604	switch (F_ISSET(dbt, DB_DBT_MALLOC | DB_DBT_REALLOC |
2605	    DB_DBT_USERCOPY | DB_DBT_USERMEM)) {
2606	case 0:
2607	case DB_DBT_MALLOC:
2608	case DB_DBT_REALLOC:
2609	case DB_DBT_USERCOPY:
2610	case DB_DBT_USERMEM:
2611		break;
2612	default:
2613		return (__db_ferr(env, name, 1));
2614	}
2615
2616	if (check_thread && DB_IS_THREADED(dbp) &&
2617	    !F_ISSET(dbt, DB_DBT_MALLOC | DB_DBT_REALLOC |
2618		DB_DBT_USERCOPY | DB_DBT_USERMEM)) {
2619		__db_errx(env,
2620		    "DB_THREAD mandates memory allocation flag on %s DBT",
2621		    name);
2622		return (EINVAL);
2623	}
2624	return (0);
2625}
2626
2627/*
2628 * __db_curinval
2629 *	Report that a cursor is in an invalid state.
2630 */
2631static int
2632__db_curinval(env)
2633	const ENV *env;
2634{
2635	__db_errx(env,
2636	    "Cursor position must be set before performing this operation");
2637	return (EINVAL);
2638}
2639
2640/*
2641 * __db_txn_auto_init --
2642 *	Handle DB_AUTO_COMMIT initialization.
2643 *
2644 * PUBLIC: int __db_txn_auto_init __P((ENV *, DB_THREAD_INFO *, DB_TXN **));
2645 */
2646int
2647__db_txn_auto_init(env, ip, txnidp)
2648	ENV *env;
2649	DB_THREAD_INFO *ip;
2650	DB_TXN **txnidp;
2651{
2652	/*
2653	 * Method calls where applications explicitly specify DB_AUTO_COMMIT
2654	 * require additional validation: the DB_AUTO_COMMIT flag cannot be
2655	 * specified if a transaction cookie is also specified, nor can the
2656	 * flag be specified in a non-transactional environment.
2657	 */
2658	if (*txnidp != NULL) {
2659		__db_errx(env,
2660    "DB_AUTO_COMMIT may not be specified along with a transaction handle");
2661		return (EINVAL);
2662	}
2663
2664	if (!TXN_ON(env)) {
2665		__db_errx(env,
2666    "DB_AUTO_COMMIT may not be specified in non-transactional environment");
2667		return (EINVAL);
2668	}
2669
2670	/*
2671	 * Our caller checked to see if replication is making a state change.
2672	 * Don't call the user-level API (which would repeat that check).
2673	 */
2674	return (__txn_begin(env, ip, NULL, txnidp, 0));
2675}
2676
2677/*
2678 * __db_txn_auto_resolve --
2679 *	Resolve local transactions.
2680 *
2681 * PUBLIC: int __db_txn_auto_resolve __P((ENV *, DB_TXN *, int, int));
2682 */
2683int
2684__db_txn_auto_resolve(env, txn, nosync, ret)
2685	ENV *env;
2686	DB_TXN *txn;
2687	int nosync, ret;
2688{
2689	int t_ret;
2690
2691	/*
2692	 * We're resolving a transaction for the user, and must decrement the
2693	 * replication handle count.  Call the user-level API.
2694	 */
2695	if (ret == 0)
2696		return (__txn_commit(txn, nosync ? DB_TXN_NOSYNC : 0));
2697
2698	if ((t_ret = __txn_abort(txn)) != 0)
2699		return (__env_panic(env, t_ret));
2700
2701	return (ret);
2702}
2703