1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996,2008 Oracle.  All rights reserved.
5 *
6 * $Id: hash.src,v 12.10 2008/01/08 20:58:33 bostic Exp $
7 */
8/*
9 * Copyright (c) 1995, 1996
10 *	Margo Seltzer.  All rights reserved.
11 */
12/*
13 * Copyright (c) 1995, 1996
14 *	The President and Fellows of Harvard University.  All rights reserved.
15 *
16 * This code is derived from software contributed to Berkeley by
17 * Margo Seltzer.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
21 * are met:
22 * 1. Redistributions of source code must retain the above copyright
23 *    notice, this list of conditions and the following disclaimer.
24 * 2. Redistributions in binary form must reproduce the above copyright
25 *    notice, this list of conditions and the following disclaimer in the
26 *    documentation and/or other materials provided with the distribution.
27 * 3. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 */
43
44DBPRIVATE
45PREFIX	__ham
46
47INCLUDE #include "db_int.h"
48INCLUDE #include "dbinc/crypto.h"
49INCLUDE #include "dbinc/db_page.h"
50INCLUDE #include "dbinc/db_dispatch.h"
51INCLUDE #include "dbinc/db_am.h"
52INCLUDE #include "dbinc/hash.h"
53INCLUDE #include "dbinc/log.h"
54INCLUDE #include "dbinc/txn.h"
55INCLUDE
56
57/*
58 * HASH-insdel: used for hash to insert/delete a pair of entries onto a master
59 * page. The pair might be regular key/data pairs or they might be the
60 * structures that refer to off page items, duplicates or offpage duplicates.
61 *  opcode - PUTPAIR/DELPAIR + big masks
62 *  fileid - identifies the file referenced
63 *  pgno - page within file
64 *  ndx - index on the page of the item being added (item index)
65 *  pagelsn - lsn on the page before the update
66 *  key - the key being inserted
67 *  data - the data being inserted
68 */
69BEGIN insdel		42	21
70ARG	opcode		u_int32_t	lu
71DB	fileid		int32_t		ld
72ARG	pgno		db_pgno_t	lu
73ARG	ndx		u_int32_t	lu
74POINTER	pagelsn		DB_LSN *	lu
75DBT	key		DBT		s
76DBT	data		DBT		s
77END
78
79/*
80 * Used to add and remove overflow pages.
81 * prev_pgno is the previous page that is going to get modified to
82 *	point to this one.  If this is the first page in a chain
83 *	then prev_pgno should be PGNO_INVALID.
84 * new_pgno is the page being allocated.
85 * next_pgno is the page that follows this one.  On allocation,
86 *	this should be PGNO_INVALID.  For deletes, it may exist.
87 * pagelsn is the old lsn on the page.
88 */
89BEGIN newpage		42	22
90ARG	opcode		u_int32_t	lu
91DB	fileid		int32_t		ld
92ARG	prev_pgno	db_pgno_t	lu
93POINTER	prevlsn		DB_LSN *	lu
94ARG	new_pgno	db_pgno_t	lu
95POINTER	pagelsn		DB_LSN *	lu
96ARG	next_pgno	db_pgno_t	lu
97POINTER	nextlsn		DB_LSN *	lu
98END
99
100/*
101 * Splitting requires two types of log messages.  The second logs the
102 * data on the original page.  To redo the split, we have to visit the
103 * new page (pages) and add the items back on the page if they are not
104 * yet there.
105 */
106BEGIN splitdata		42	24
107DB	fileid		int32_t		ld
108ARG	opcode		u_int32_t	lu
109ARG	pgno		db_pgno_t	lu
110PGDBT	pageimage	DBT		s
111POINTER	pagelsn		DB_LSN *	lu
112END
113
114/*
115 * HASH-replace: is used for hash to handle partial puts that only
116 * affect a single master page.
117 *  fileid - identifies the file referenced
118 *  pgno - page within file
119 *  ndx - index on the page of the item being modified (item index)
120 *  pagelsn - lsn on the page before the update
121 *  off - offset in the old item where the new item is going.
122 *  olditem - DBT that describes the part of the item being replaced.
123 *  newitem - DBT of the new item.
124 *  makedup - this was a replacement that made an item a duplicate.
125 */
126BEGIN replace		42	25
127DB	fileid		int32_t		ld
128ARG	pgno		db_pgno_t	lu
129ARG	ndx		u_int32_t	lu
130POINTER	pagelsn		DB_LSN *	lu
131ARG	off		int32_t		ld
132DBT	olditem		DBT		s
133DBT	newitem		DBT		s
134ARG	makedup		u_int32_t	lu
135END
136
137/*
138 * Used when we empty the first page in a bucket and there are pages after
139 * it.  The page after it gets copied into the bucket page (since bucket
140 * pages have to be in fixed locations).
141 * pgno: the bucket page
142 * pagelsn: the old LSN on the bucket page
143 * next_pgno: the page number of the next page
144 * nnext_pgno: page after next_pgno (may need to change its prev)
145 * nnextlsn: the LSN of nnext_pgno.
146 */
147BEGIN copypage		42	28
148DB	fileid		int32_t		ld
149ARG	pgno		db_pgno_t	lu
150POINTER	pagelsn		DB_LSN *	lu
151ARG	next_pgno	db_pgno_t	lu
152POINTER	nextlsn		DB_LSN *	lu
153ARG	nnext_pgno	db_pgno_t	lu
154POINTER	nnextlsn	DB_LSN *	lu
155PGDBT	page		DBT		s
156END
157
158/*
159 * This record logs the meta-data aspects of a split operation.  It has enough
160 * information so that we can record both an individual page allocation as well
161 * as a group allocation which we do because in sub databases, the pages in
162 * a hash doubling, must be contiguous.  If we do a group allocation, the
163 * number of pages allocated is bucket + 1, pgno is the page number of the
164 * first newly allocated bucket.
165 *
166 * bucket:	Old maximum bucket number.
167 * mmpgno:	Master meta-data page number (0 if same as mpgno).
168 * mmetalsn:	Lsn of the master meta-data page.
169 * mpgno:	Meta-data page number.
170 * metalsn:	Lsn of the meta-data page.
171 * pgno:	Page allocated to bucket + 1 (first newly allocated page)
172 * pagelsn:	Lsn of either the first page allocated (if newalloc == 0) or
173 *		the last page allocated (if newalloc == 1).
174 * newalloc:	1 indicates that this record did the actual allocation;
175 *		0 indicates that the pages were already allocated from a
176 *		previous (failed) allocation.
177 * last_pgno:	the last page in the file before this op (4.3+).
178 */
179BEGIN_COMPAT metagroup		42	29
180DB	fileid		int32_t		ld
181ARG	bucket		u_int32_t	lu
182ARG	 mmpgno		db_pgno_t	lu
183POINTER	mmetalsn	DB_LSN *	lu
184ARG	mpgno		db_pgno_t	lu
185POINTER	metalsn		DB_LSN *	lu
186ARG	pgno		db_pgno_t	lu
187POINTER	pagelsn		DB_LSN *	lu
188ARG	newalloc	u_int32_t	lu
189END
190
191BEGIN metagroup		43	29
192DB	fileid		int32_t		ld
193ARG	bucket		u_int32_t	lu
194ARG	 mmpgno		db_pgno_t	lu
195POINTER	mmetalsn	DB_LSN *	lu
196ARG	mpgno		db_pgno_t	lu
197POINTER	metalsn		DB_LSN *	lu
198ARG	pgno		db_pgno_t	lu
199POINTER	pagelsn		DB_LSN *	lu
200ARG	newalloc	u_int32_t	lu
201ARG	last_pgno	db_pgno_t	lu
202END
203
204/*
205 * groupalloc
206 *
207 * This is used in conjunction with MPOOL_NEW_GROUP when we are creating
208 * a new database to make sure that we recreate or reclaim free pages
209 * when we allocate a chunk of contiguous ones during database creation.
210 *
211 * meta_lsn: meta-data lsn
212 * start_pgno:	starting page number
213 * num: number	of allocated pages
214 * unused:	unused, historically the meta-data free list page number
215 * last_pgno:	the last page in the file before this op (4.3+).
216 */
217BEGIN_COMPAT groupalloc	42	32
218DB	fileid		int32_t		ld
219POINTER	meta_lsn	DB_LSN *	lu
220ARG	start_pgno	db_pgno_t	lu
221ARG	num		u_int32_t	lu
222ARG	free		db_pgno_t	lu
223END
224
225BEGIN groupalloc	43	32
226DB	fileid		int32_t		ld
227POINTER	meta_lsn	DB_LSN *	lu
228ARG	start_pgno	db_pgno_t	lu
229ARG	num		u_int32_t	lu
230ARG	unused		db_pgno_t	lu
231ARG	last_pgno	db_pgno_t	lu
232END
233
234/*
235 * Records for backing out cursor adjustment.
236 *   curadj - added or deleted a record or a dup
237 *	within a record.
238 *	pgno	- page that was effected
239 *	indx	- indx of recrod effected.
240 *	len	- if a dup its length.
241 *	dup_off	- if a dup its offset
242 *	add	- 1 if add 0 if delete
243 *	is_dup  - 1 if dup 0 otherwise.
244 *	order	- order assigned to this deleted record or dup.
245 *
246 *   chgpg - rmoved a page, move the records to a new page
247 *	mode	- CHGPG page was deleted or records move to new page.
248 *		- SPLIT we split a bucket
249 *		- DUP we convered to off page duplicates.
250 *	old_pgno, new_pgno - old and new page numbers.
251 *	old_index, new_index - old and new index numbers, NDX_INVALID if
252 *		it effects all records on the page.
253 *		For three opcodes new in 3.3 (DB_HAM_DELFIRSTPG, DELMIDPG,
254 *		and DELLASTPG), we overload old_indx and new_indx to avoid
255 *		needing a new log record type:  old_indx stores the only
256 *		indx of interest to these records, and new_indx stores the
257 *		order that's assigned to the lowest deleted record we're
258 *		moving.
259 */
260BEGIN curadj	42	33
261DB	fileid		int32_t		ld
262ARG	pgno		db_pgno_t	lu
263ARG	indx		u_int32_t	lu
264ARG	len		u_int32_t	lu
265ARG	dup_off		u_int32_t	lu
266ARG	add		int		ld
267ARG	is_dup		int		ld
268ARG	order		u_int32_t	lu
269END
270
271BEGIN chgpg	42	34
272DB	fileid		int32_t		ld
273ARG	mode		db_ham_mode	ld
274ARG	old_pgno	db_pgno_t	lu
275ARG	new_pgno	db_pgno_t	lu
276ARG	old_indx	u_int32_t	lu
277ARG	new_indx	u_int32_t	lu
278END
279
280