1/*	$NetBSD: bt_put.c,v 1.22 2016/09/24 21:31:25 christos Exp $	*/
2
3/*-
4 * Copyright (c) 1990, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Mike Olson.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#if HAVE_NBTOOL_CONFIG_H
36#include "nbtool_config.h"
37#endif
38
39#include <sys/cdefs.h>
40__RCSID("$NetBSD: bt_put.c,v 1.22 2016/09/24 21:31:25 christos Exp $");
41
42#include "namespace.h"
43#include <sys/types.h>
44
45#include <assert.h>
46#include <errno.h>
47#include <stdio.h>
48#include <stdlib.h>
49#include <string.h>
50
51#include <db.h>
52#include "btree.h"
53
54static EPG *bt_fast(BTREE *, const DBT *, const DBT *, int *);
55
56/*
57 * __BT_PUT -- Add a btree item to the tree.
58 *
59 * Parameters:
60 *	dbp:	pointer to access method
61 *	key:	key
62 *	data:	data
63 *	flag:	R_NOOVERWRITE
64 *
65 * Returns:
66 *	RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is already in the
67 *	tree and R_NOOVERWRITE specified.
68 */
69int
70__bt_put(const DB *dbp, DBT *key, const DBT *data, u_int flags)
71{
72	BTREE *t;
73	DBT tkey, tdata;
74	EPG *e = NULL; /* pacify gcc */
75	PAGE *h;
76	indx_t idx, nxtindex;
77	pgno_t pg;
78	uint32_t nbytes, temp;
79	int dflags, exact, status;
80	char *dest, db[NOVFLSIZE], kb[NOVFLSIZE];
81
82	t = dbp->internal;
83
84	/* Toss any page pinned across calls. */
85	if (t->bt_pinned != NULL) {
86		mpool_put(t->bt_mp, t->bt_pinned, 0);
87		t->bt_pinned = NULL;
88	}
89
90	/* Check for change to a read-only tree. */
91	if (F_ISSET(t, B_RDONLY)) {
92		errno = EPERM;
93		return (RET_ERROR);
94	}
95
96	switch (flags) {
97	case 0:
98	case R_NOOVERWRITE:
99		break;
100	case R_CURSOR:
101		/*
102		 * If flags is R_CURSOR, put the cursor.  Must already
103		 * have started a scan and not have already deleted it.
104		 */
105		if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
106		    !F_ISSET(&t->bt_cursor,
107		        CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
108			break;
109		/* FALLTHROUGH */
110	default:
111		errno = EINVAL;
112		return (RET_ERROR);
113	}
114
115	/*
116	 * If the key/data pair won't fit on a page, store it on overflow
117	 * pages.  Only put the key on the overflow page if the pair are
118	 * still too big after moving the data to an overflow page.
119	 *
120	 * XXX
121	 * If the insert fails later on, the overflow pages aren't recovered.
122	 */
123	dflags = 0;
124	if (key->size + data->size > t->bt_ovflsize) {
125		if (key->size > t->bt_ovflsize) {
126storekey:		if (__ovfl_put(t, key, &pg) == RET_ERROR)
127				return (RET_ERROR);
128			tkey.data = kb;
129			tkey.size = NOVFLSIZE;
130			memmove(kb, &pg, sizeof(pg));
131			memmove(kb + sizeof(pgno_t),
132			    &key->size, sizeof(uint32_t));
133			dflags |= P_BIGKEY;
134			key = &tkey;
135		}
136		if (key->size + data->size > t->bt_ovflsize) {
137			if (__ovfl_put(t, data, &pg) == RET_ERROR)
138				return (RET_ERROR);
139			tdata.data = db;
140			tdata.size = NOVFLSIZE;
141			memmove(db, &pg, sizeof(pg));
142			_DBFIT(data->size, uint32_t);
143			temp = (uint32_t)data->size;
144			(void)memmove(db + sizeof(pgno_t),
145			    &temp, sizeof(uint32_t));
146			dflags |= P_BIGDATA;
147			data = &tdata;
148		}
149		if (key->size + data->size > t->bt_ovflsize)
150			goto storekey;
151	}
152
153	/* Replace the cursor. */
154	if (flags == R_CURSOR) {
155		if ((h = mpool_get(t->bt_mp, t->bt_cursor.pg.pgno, 0)) == NULL)
156			return (RET_ERROR);
157		idx = t->bt_cursor.pg.index;
158		goto delete;
159	}
160
161	/*
162	 * Find the key to delete, or, the location at which to insert.
163	 * Bt_fast and __bt_search both pin the returned page.
164	 */
165	if (t->bt_order == NOT || (e = bt_fast(t, key, data, &exact)) == NULL)
166		if ((e = __bt_search(t, key, &exact)) == NULL)
167			return (RET_ERROR);
168	h = e->page;
169	idx = e->index;
170
171	/*
172	 * Add the key/data pair to the tree.  If an identical key is already
173	 * in the tree, and R_NOOVERWRITE is set, an error is returned.  If
174	 * R_NOOVERWRITE is not set, the key is either added (if duplicates are
175	 * permitted) or an error is returned.
176	 */
177	switch (flags) {
178	case R_NOOVERWRITE:
179		if (!exact)
180			break;
181		mpool_put(t->bt_mp, h, 0);
182		return (RET_SPECIAL);
183	default:
184		if (!exact || !F_ISSET(t, B_NODUPS))
185			break;
186		/*
187		 * !!!
188		 * Note, the delete may empty the page, so we need to put a
189		 * new entry into the page immediately.
190		 */
191delete:		if (__bt_dleaf(t, key, h, (u_int)idx) == RET_ERROR) {
192			mpool_put(t->bt_mp, h, 0);
193			return (RET_ERROR);
194		}
195		break;
196	}
197
198	/*
199	 * If not enough room, or the user has put a ceiling on the number of
200	 * keys permitted in the page, split the page.  The split code will
201	 * insert the key and data and unpin the current page.  If inserting
202	 * into the offset array, shift the pointers up.
203	 */
204	nbytes = NBLEAFDBT(key->size, data->size);
205	if ((uint32_t)h->upper - (uint32_t)h->lower < nbytes + sizeof(indx_t)) {
206		if ((status = __bt_split(t, h, key,
207		    data, dflags, nbytes, (u_int)idx)) != RET_SUCCESS)
208			return (status);
209		goto success;
210	}
211
212	if (idx < (nxtindex = NEXTINDEX(h)))
213		memmove(h->linp + idx + 1, h->linp + idx,
214		    (nxtindex - idx) * sizeof(indx_t));
215	h->lower += sizeof(indx_t);
216
217	h->linp[idx] = h->upper -= nbytes;
218	dest = (char *)(void *)h + h->upper;
219	WR_BLEAF(dest, key, data, dflags);
220
221	/* If the cursor is on this page, adjust it as necessary. */
222	if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
223	    !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
224	    t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index >= idx)
225		++t->bt_cursor.pg.index;
226
227	if (t->bt_order == NOT) {
228		if (h->nextpg == P_INVALID) {
229			if (idx == NEXTINDEX(h) - 1) {
230				t->bt_order = FORWARD;
231				t->bt_last.index = idx;
232				t->bt_last.pgno = h->pgno;
233			}
234		} else if (h->prevpg == P_INVALID) {
235			if (idx == 0) {
236				t->bt_order = BACK;
237				t->bt_last.index = 0;
238				t->bt_last.pgno = h->pgno;
239			}
240		}
241	}
242
243	mpool_put(t->bt_mp, h, MPOOL_DIRTY);
244
245success:
246	if (flags == R_SETCURSOR)
247		__bt_setcur(t, e->page->pgno, (u_int)e->index);
248
249	F_SET(t, B_MODIFIED);
250	return (RET_SUCCESS);
251}
252
253#ifdef STATISTICS
254unsigned long bt_cache_hit, bt_cache_miss;
255#endif
256
257/*
258 * BT_FAST -- Do a quick check for sorted data.
259 *
260 * Parameters:
261 *	t:	tree
262 *	key:	key to insert
263 *
264 * Returns:
265 * 	EPG for new record or NULL if not found.
266 */
267static EPG *
268bt_fast(BTREE *t, const DBT *key, const DBT *data, int *exactp)
269{
270	PAGE *h;
271	uint32_t nbytes;
272	int cmp;
273
274	if ((h = mpool_get(t->bt_mp, t->bt_last.pgno, 0)) == NULL) {
275		t->bt_order = NOT;
276		return (NULL);
277	}
278	t->bt_cur.page = h;
279	t->bt_cur.index = t->bt_last.index;
280
281	/*
282	 * If won't fit in this page or have too many keys in this page,
283	 * have to search to get split stack.
284	 */
285	nbytes = NBLEAFDBT(key->size, data->size);
286	if ((uint32_t)h->upper - (uint32_t)h->lower < nbytes + sizeof(indx_t))
287		goto miss;
288
289	if (t->bt_order == FORWARD) {
290		if (t->bt_cur.page->nextpg != P_INVALID)
291			goto miss;
292		if (t->bt_cur.index != NEXTINDEX(h) - 1)
293			goto miss;
294		if ((cmp = __bt_cmp(t, key, &t->bt_cur)) < 0)
295			goto miss;
296		t->bt_last.index = cmp ? ++t->bt_cur.index : t->bt_cur.index;
297	} else {
298		if (t->bt_cur.page->prevpg != P_INVALID)
299			goto miss;
300		if (t->bt_cur.index != 0)
301			goto miss;
302		if ((cmp = __bt_cmp(t, key, &t->bt_cur)) > 0)
303			goto miss;
304		t->bt_last.index = 0;
305	}
306	*exactp = cmp == 0;
307#ifdef STATISTICS
308	++bt_cache_hit;
309#endif
310	return (&t->bt_cur);
311
312miss:
313#ifdef STATISTICS
314	++bt_cache_miss;
315#endif
316	t->bt_order = NOT;
317	mpool_put(t->bt_mp, h, 0);
318	return (NULL);
319}
320