1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996,2008 Oracle. All rights reserved. 5 */ 6/* 7 * Copyright (c) 1990, 1993, 1994, 1995, 1996 8 * Keith Bostic. All rights reserved. 9 */ 10/* 11 * Copyright (c) 1990, 1993, 1994, 1995 12 * The Regents of the University of California. All rights reserved. 13 * 14 * This code is derived from software contributed to Berkeley by 15 * Mike Olson. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 3. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * $Id: db_ovfl_vrfy.c,v 12.19 2008/04/11 16:29:02 bschmeck Exp $ 42 */ 43 44#include "db_config.h" 45 46#include "db_int.h" 47#include "dbinc/db_page.h" 48#include "dbinc/db_am.h" 49#include "dbinc/db_verify.h" 50#include "dbinc/mp.h" 51 52/* 53 * __db_vrfy_overflow -- 54 * Verify overflow page. 55 * 56 * PUBLIC: int __db_vrfy_overflow __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, 57 * PUBLIC: u_int32_t)); 58 */ 59int 60__db_vrfy_overflow(dbp, vdp, h, pgno, flags) 61 DB *dbp; 62 VRFY_DBINFO *vdp; 63 PAGE *h; 64 db_pgno_t pgno; 65 u_int32_t flags; 66{ 67 VRFY_PAGEINFO *pip; 68 int isbad, ret, t_ret; 69 70 isbad = 0; 71 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 72 return (ret); 73 74 if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) { 75 if (ret == DB_VERIFY_BAD) 76 isbad = 1; 77 else 78 goto err; 79 } 80 81 pip->refcount = OV_REF(h); 82 if (pip->refcount < 1) { 83 EPRINT((dbp->env, 84 "Page %lu: overflow page has zero reference count", 85 (u_long)pgno)); 86 isbad = 1; 87 } 88 89 /* Just store for now. */ 90 pip->olen = HOFFSET(h); 91 92err: if ((t_ret = __db_vrfy_putpageinfo(dbp->env, vdp, pip)) != 0) 93 ret = t_ret; 94 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); 95} 96 97/* 98 * __db_vrfy_ovfl_structure -- 99 * Walk a list of overflow pages, avoiding cycles and marking 100 * pages seen. 101 * 102 * PUBLIC: int __db_vrfy_ovfl_structure 103 * PUBLIC: __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, u_int32_t)); 104 */ 105int 106__db_vrfy_ovfl_structure(dbp, vdp, pgno, tlen, flags) 107 DB *dbp; 108 VRFY_DBINFO *vdp; 109 db_pgno_t pgno; 110 u_int32_t tlen; 111 u_int32_t flags; 112{ 113 DB *pgset; 114 ENV *env; 115 VRFY_PAGEINFO *pip; 116 db_pgno_t next, prev; 117 int isbad, ret, seen_cnt, t_ret; 118 u_int32_t refcount; 119 120 env = dbp->env; 121 pgset = vdp->pgset; 122 DB_ASSERT(env, pgset != NULL); 123 isbad = 0; 124 125 /* This shouldn't happen, but just to be sure. */ 126 if (!IS_VALID_PGNO(pgno)) 127 return (DB_VERIFY_BAD); 128 129 /* 130 * Check the first prev_pgno; it ought to be PGNO_INVALID, 131 * since there's no prev page. 132 */ 133 if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) 134 return (ret); 135 136 /* The refcount is stored on the first overflow page. */ 137 refcount = pip->refcount; 138 139 if (pip->type != P_OVERFLOW) { 140 EPRINT((env, 141 "Page %lu: overflow page of invalid type %lu", 142 (u_long)pgno, (u_long)pip->type)); 143 ret = DB_VERIFY_BAD; 144 goto err; /* Unsafe to continue. */ 145 } 146 147 prev = pip->prev_pgno; 148 if (prev != PGNO_INVALID) { 149 EPRINT((env, 150 "Page %lu: first page in overflow chain has a prev_pgno %lu", 151 (u_long)pgno, (u_long)prev)); 152 isbad = 1; 153 } 154 155 for (;;) { 156 /* 157 * We may have seen this page elsewhere, if the overflow entry 158 * has been promoted to an internal page; we just want to 159 * make sure that each overflow page is seen exactly as many 160 * times as its refcount dictates. 161 * 162 * Note that this code also serves to keep us from looping 163 * infinitely if there's a cycle in an overflow chain. 164 */ 165 if ((ret = __db_vrfy_pgset_get(pgset, 166 vdp->thread_info, pgno, &seen_cnt)) != 0) 167 goto err; 168 if ((u_int32_t)seen_cnt > refcount) { 169 EPRINT((env, 170 "Page %lu: encountered too many times in overflow traversal", 171 (u_long)pgno)); 172 ret = DB_VERIFY_BAD; 173 goto err; 174 } 175 if ((ret = 176 __db_vrfy_pgset_inc(pgset, vdp->thread_info, pgno)) != 0) 177 goto err; 178 179 /* 180 * Each overflow page can be referenced multiple times, 181 * because it's possible for overflow Btree keys to get 182 * promoted to internal pages. We want to make sure that 183 * each page is referenced from a Btree leaf (or Hash data 184 * page, which we consider a "leaf" here) exactly once; if 185 * the parent was a leaf, set a flag to indicate that we've 186 * seen this page in a leaf context. 187 * 188 * If the parent is not a leaf--in which case it's a Btree 189 * internal page--we don't need to bother doing any further 190 * verification, as we'll do it when we hit the leaf (or 191 * complain that we never saw the leaf). Only the first 192 * page in an overflow chain should ever have a refcount 193 * greater than 1, and the combination of the LEAFSEEN check 194 * and the fact that we bail after the first page for 195 * non-leaves should ensure this. 196 * 197 * Note that each "child" of a page, such as an overflow page, 198 * is stored and verified in a structure check exactly once, 199 * so this code does not need to contend with the fact that 200 * overflow chains used as Btree duplicate keys may be 201 * referenced multiply from a single Btree leaf page. 202 */ 203 if (LF_ISSET(DB_ST_OVFL_LEAF)) { 204 if (F_ISSET(pip, VRFY_OVFL_LEAFSEEN)) { 205 EPRINT((env, 206 "Page %lu: overflow page linked twice from leaf or data page", 207 (u_long)pgno)); 208 ret = DB_VERIFY_BAD; 209 goto err; 210 } 211 F_SET(pip, VRFY_OVFL_LEAFSEEN); 212 } 213 214 /* 215 * We want to verify each overflow chain only once, and 216 * although no chain should be linked more than once from a 217 * leaf page, we can't guarantee that it'll be linked that 218 * once if it's linked from an internal page and the key 219 * is gone. 220 * 221 * seen_cnt is the number of times we'd encountered this page 222 * before calling this function. 223 */ 224 if (seen_cnt == 0) { 225 /* 226 * Keep a running tab on how much of the item we've 227 * seen. 228 */ 229 tlen -= pip->olen; 230 231 /* Send the application feedback about our progress. */ 232 if (!LF_ISSET(DB_SALVAGE)) 233 __db_vrfy_struct_feedback(dbp, vdp); 234 } else 235 goto done; 236 237 next = pip->next_pgno; 238 239 /* Are we there yet? */ 240 if (next == PGNO_INVALID) 241 break; 242 243 /* 244 * We've already checked this when we saved it, but just 245 * to be sure... 246 */ 247 if (!IS_VALID_PGNO(next)) { 248 EPRINT((env, 249 "Page %lu: bad next_pgno %lu on overflow page", 250 (u_long)pgno, (u_long)next)); 251 ret = DB_VERIFY_BAD; 252 goto err; 253 } 254 255 if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 || 256 (ret = __db_vrfy_getpageinfo(vdp, next, &pip)) != 0) 257 return (ret); 258 if (pip->prev_pgno != pgno) { 259 EPRINT((env, 260 "Page %lu: bad prev_pgno %lu on overflow page (should be %lu)", 261 (u_long)next, (u_long)pip->prev_pgno, 262 (u_long)pgno)); 263 isbad = 1; 264 /* 265 * It's safe to continue because we have separate 266 * cycle detection. 267 */ 268 } 269 270 pgno = next; 271 } 272 273 if (tlen > 0) { 274 isbad = 1; 275 EPRINT((env, 276 "Page %lu: overflow item incomplete", (u_long)pgno)); 277 } 278 279done: 280err: if ((t_ret = 281 __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) 282 ret = t_ret; 283 return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); 284} 285 286/* 287 * __db_safe_goff -- 288 * Get an overflow item, very carefully, from an untrusted database, 289 * in the context of the salvager. 290 * 291 * PUBLIC: int __db_safe_goff __P((DB *, VRFY_DBINFO *, 292 * PUBLIC: db_pgno_t,DBT *, void *, u_int32_t)); 293 */ 294int 295__db_safe_goff(dbp, vdp, pgno, dbt, buf, flags) 296 DB *dbp; 297 VRFY_DBINFO *vdp; 298 db_pgno_t pgno; 299 DBT *dbt; 300 void *buf; 301 u_int32_t flags; 302{ 303 DB_MPOOLFILE *mpf; 304 PAGE *h; 305 int ret, t_ret; 306 u_int32_t bytesgot, bytes; 307 u_int8_t *src, *dest; 308 309 mpf = dbp->mpf; 310 h = NULL; 311 ret = t_ret = 0; 312 bytesgot = bytes = 0; 313 314 /* 315 * Back up to the start of the overflow chain (if necessary) via the 316 * prev pointer of the overflow page. This guarantees we transverse the 317 * longest possible chains of overflow pages and won't be called again 318 * with a pgno earlier in the chain, stepping on ourselves. 319 */ 320 for (;;) { 321 if ((ret = __memp_fget( 322 mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0) 323 return (ret); 324 325 if (PREV_PGNO(h) == PGNO_INVALID || 326 !IS_VALID_PGNO(PREV_PGNO(h))) 327 break; 328 329 pgno = PREV_PGNO(h); 330 331 if ((ret = __memp_fput(mpf, 332 vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0) 333 return (ret); 334 } 335 if ((ret = __memp_fput( 336 mpf, vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0) 337 return (ret); 338 339 h = NULL; 340 341 while ((pgno != PGNO_INVALID) && (IS_VALID_PGNO(pgno))) { 342 /* 343 * Mark that we're looking at this page; if we've seen it 344 * already, quit. 345 */ 346 if ((ret = __db_salvage_markdone(vdp, pgno)) != 0) 347 break; 348 349 if ((ret = __memp_fget(mpf, &pgno, 350 vdp->thread_info, NULL, 0, &h)) != 0) 351 break; 352 353 /* 354 * Make sure it's really an overflow page, unless we're 355 * being aggressive, in which case we pretend it is. 356 */ 357 if (!LF_ISSET(DB_AGGRESSIVE) && TYPE(h) != P_OVERFLOW) { 358 ret = DB_VERIFY_BAD; 359 break; 360 } 361 362 src = (u_int8_t *)h + P_OVERHEAD(dbp); 363 bytes = OV_LEN(h); 364 365 if (bytes + P_OVERHEAD(dbp) > dbp->pgsize) 366 bytes = dbp->pgsize - P_OVERHEAD(dbp); 367 368 /* 369 * We need to realloc buf each time, we don't know how large it 370 * was when passed in. 371 */ 372 if ((ret = __os_realloc(dbp->env, 373 bytesgot + bytes, buf)) != 0) 374 break; 375 376 dest = *(u_int8_t **)buf + bytesgot; 377 bytesgot += bytes; 378 379 memcpy(dest, src, bytes); 380 381 pgno = NEXT_PGNO(h); 382 383 if ((ret = __memp_fput(mpf, 384 vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0) 385 break; 386 h = NULL; 387 } 388 389 /* 390 * If we're being aggressive, salvage a partial datum if there 391 * was an error somewhere along the way. 392 */ 393 if (ret == 0 || LF_ISSET(DB_AGGRESSIVE)) { 394 dbt->size = bytesgot; 395 dbt->data = *(void **)buf; 396 } 397 398 /* If we broke out on error, don't leave pages pinned. */ 399 if (h != NULL && (t_ret = __memp_fput(mpf, 400 vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0 && ret == 0) 401 ret = t_ret; 402 403 return (ret); 404} 405