1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996,2008 Oracle. All rights reserved. 5 * 6 * $Id: hash.src,v 12.10 2008/01/08 20:58:33 bostic Exp $ 7 */ 8/* 9 * Copyright (c) 1995, 1996 10 * Margo Seltzer. All rights reserved. 11 */ 12/* 13 * Copyright (c) 1995, 1996 14 * The President and Fellows of Harvard University. All rights reserved. 15 * 16 * This code is derived from software contributed to Berkeley by 17 * Margo Seltzer. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions 21 * are met: 22 * 1. Redistributions of source code must retain the above copyright 23 * notice, this list of conditions and the following disclaimer. 24 * 2. Redistributions in binary form must reproduce the above copyright 25 * notice, this list of conditions and the following disclaimer in the 26 * documentation and/or other materials provided with the distribution. 27 * 3. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41 * SUCH DAMAGE. 42 */ 43 44DBPRIVATE 45PREFIX __ham 46 47INCLUDE #include "db_int.h" 48INCLUDE #include "dbinc/crypto.h" 49INCLUDE #include "dbinc/db_page.h" 50INCLUDE #include "dbinc/db_dispatch.h" 51INCLUDE #include "dbinc/db_am.h" 52INCLUDE #include "dbinc/hash.h" 53INCLUDE #include "dbinc/log.h" 54INCLUDE #include "dbinc/txn.h" 55INCLUDE 56 57/* 58 * HASH-insdel: used for hash to insert/delete a pair of entries onto a master 59 * page. The pair might be regular key/data pairs or they might be the 60 * structures that refer to off page items, duplicates or offpage duplicates. 61 * opcode - PUTPAIR/DELPAIR + big masks 62 * fileid - identifies the file referenced 63 * pgno - page within file 64 * ndx - index on the page of the item being added (item index) 65 * pagelsn - lsn on the page before the update 66 * key - the key being inserted 67 * data - the data being inserted 68 */ 69BEGIN insdel 42 21 70ARG opcode u_int32_t lu 71DB fileid int32_t ld 72ARG pgno db_pgno_t lu 73ARG ndx u_int32_t lu 74POINTER pagelsn DB_LSN * lu 75DBT key DBT s 76DBT data DBT s 77END 78 79/* 80 * Used to add and remove overflow pages. 81 * prev_pgno is the previous page that is going to get modified to 82 * point to this one. If this is the first page in a chain 83 * then prev_pgno should be PGNO_INVALID. 84 * new_pgno is the page being allocated. 85 * next_pgno is the page that follows this one. On allocation, 86 * this should be PGNO_INVALID. For deletes, it may exist. 87 * pagelsn is the old lsn on the page. 88 */ 89BEGIN newpage 42 22 90ARG opcode u_int32_t lu 91DB fileid int32_t ld 92ARG prev_pgno db_pgno_t lu 93POINTER prevlsn DB_LSN * lu 94ARG new_pgno db_pgno_t lu 95POINTER pagelsn DB_LSN * lu 96ARG next_pgno db_pgno_t lu 97POINTER nextlsn DB_LSN * lu 98END 99 100/* 101 * Splitting requires two types of log messages. The second logs the 102 * data on the original page. To redo the split, we have to visit the 103 * new page (pages) and add the items back on the page if they are not 104 * yet there. 105 */ 106BEGIN splitdata 42 24 107DB fileid int32_t ld 108ARG opcode u_int32_t lu 109ARG pgno db_pgno_t lu 110PGDBT pageimage DBT s 111POINTER pagelsn DB_LSN * lu 112END 113 114/* 115 * HASH-replace: is used for hash to handle partial puts that only 116 * affect a single master page. 117 * fileid - identifies the file referenced 118 * pgno - page within file 119 * ndx - index on the page of the item being modified (item index) 120 * pagelsn - lsn on the page before the update 121 * off - offset in the old item where the new item is going. 122 * olditem - DBT that describes the part of the item being replaced. 123 * newitem - DBT of the new item. 124 * makedup - this was a replacement that made an item a duplicate. 125 */ 126BEGIN replace 42 25 127DB fileid int32_t ld 128ARG pgno db_pgno_t lu 129ARG ndx u_int32_t lu 130POINTER pagelsn DB_LSN * lu 131ARG off int32_t ld 132DBT olditem DBT s 133DBT newitem DBT s 134ARG makedup u_int32_t lu 135END 136 137/* 138 * Used when we empty the first page in a bucket and there are pages after 139 * it. The page after it gets copied into the bucket page (since bucket 140 * pages have to be in fixed locations). 141 * pgno: the bucket page 142 * pagelsn: the old LSN on the bucket page 143 * next_pgno: the page number of the next page 144 * nnext_pgno: page after next_pgno (may need to change its prev) 145 * nnextlsn: the LSN of nnext_pgno. 146 */ 147BEGIN copypage 42 28 148DB fileid int32_t ld 149ARG pgno db_pgno_t lu 150POINTER pagelsn DB_LSN * lu 151ARG next_pgno db_pgno_t lu 152POINTER nextlsn DB_LSN * lu 153ARG nnext_pgno db_pgno_t lu 154POINTER nnextlsn DB_LSN * lu 155PGDBT page DBT s 156END 157 158/* 159 * This record logs the meta-data aspects of a split operation. It has enough 160 * information so that we can record both an individual page allocation as well 161 * as a group allocation which we do because in sub databases, the pages in 162 * a hash doubling, must be contiguous. If we do a group allocation, the 163 * number of pages allocated is bucket + 1, pgno is the page number of the 164 * first newly allocated bucket. 165 * 166 * bucket: Old maximum bucket number. 167 * mmpgno: Master meta-data page number (0 if same as mpgno). 168 * mmetalsn: Lsn of the master meta-data page. 169 * mpgno: Meta-data page number. 170 * metalsn: Lsn of the meta-data page. 171 * pgno: Page allocated to bucket + 1 (first newly allocated page) 172 * pagelsn: Lsn of either the first page allocated (if newalloc == 0) or 173 * the last page allocated (if newalloc == 1). 174 * newalloc: 1 indicates that this record did the actual allocation; 175 * 0 indicates that the pages were already allocated from a 176 * previous (failed) allocation. 177 * last_pgno: the last page in the file before this op (4.3+). 178 */ 179BEGIN_COMPAT metagroup 42 29 180DB fileid int32_t ld 181ARG bucket u_int32_t lu 182ARG mmpgno db_pgno_t lu 183POINTER mmetalsn DB_LSN * lu 184ARG mpgno db_pgno_t lu 185POINTER metalsn DB_LSN * lu 186ARG pgno db_pgno_t lu 187POINTER pagelsn DB_LSN * lu 188ARG newalloc u_int32_t lu 189END 190 191BEGIN metagroup 43 29 192DB fileid int32_t ld 193ARG bucket u_int32_t lu 194ARG mmpgno db_pgno_t lu 195POINTER mmetalsn DB_LSN * lu 196ARG mpgno db_pgno_t lu 197POINTER metalsn DB_LSN * lu 198ARG pgno db_pgno_t lu 199POINTER pagelsn DB_LSN * lu 200ARG newalloc u_int32_t lu 201ARG last_pgno db_pgno_t lu 202END 203 204/* 205 * groupalloc 206 * 207 * This is used in conjunction with MPOOL_NEW_GROUP when we are creating 208 * a new database to make sure that we recreate or reclaim free pages 209 * when we allocate a chunk of contiguous ones during database creation. 210 * 211 * meta_lsn: meta-data lsn 212 * start_pgno: starting page number 213 * num: number of allocated pages 214 * unused: unused, historically the meta-data free list page number 215 * last_pgno: the last page in the file before this op (4.3+). 216 */ 217BEGIN_COMPAT groupalloc 42 32 218DB fileid int32_t ld 219POINTER meta_lsn DB_LSN * lu 220ARG start_pgno db_pgno_t lu 221ARG num u_int32_t lu 222ARG free db_pgno_t lu 223END 224 225BEGIN groupalloc 43 32 226DB fileid int32_t ld 227POINTER meta_lsn DB_LSN * lu 228ARG start_pgno db_pgno_t lu 229ARG num u_int32_t lu 230ARG unused db_pgno_t lu 231ARG last_pgno db_pgno_t lu 232END 233 234/* 235 * Records for backing out cursor adjustment. 236 * curadj - added or deleted a record or a dup 237 * within a record. 238 * pgno - page that was effected 239 * indx - indx of recrod effected. 240 * len - if a dup its length. 241 * dup_off - if a dup its offset 242 * add - 1 if add 0 if delete 243 * is_dup - 1 if dup 0 otherwise. 244 * order - order assigned to this deleted record or dup. 245 * 246 * chgpg - rmoved a page, move the records to a new page 247 * mode - CHGPG page was deleted or records move to new page. 248 * - SPLIT we split a bucket 249 * - DUP we convered to off page duplicates. 250 * old_pgno, new_pgno - old and new page numbers. 251 * old_index, new_index - old and new index numbers, NDX_INVALID if 252 * it effects all records on the page. 253 * For three opcodes new in 3.3 (DB_HAM_DELFIRSTPG, DELMIDPG, 254 * and DELLASTPG), we overload old_indx and new_indx to avoid 255 * needing a new log record type: old_indx stores the only 256 * indx of interest to these records, and new_indx stores the 257 * order that's assigned to the lowest deleted record we're 258 * moving. 259 */ 260BEGIN curadj 42 33 261DB fileid int32_t ld 262ARG pgno db_pgno_t lu 263ARG indx u_int32_t lu 264ARG len u_int32_t lu 265ARG dup_off u_int32_t lu 266ARG add int ld 267ARG is_dup int ld 268ARG order u_int32_t lu 269END 270 271BEGIN chgpg 42 34 272DB fileid int32_t ld 273ARG mode db_ham_mode ld 274ARG old_pgno db_pgno_t lu 275ARG new_pgno db_pgno_t lu 276ARG old_indx u_int32_t lu 277ARG new_indx u_int32_t lu 278END 279 280