1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996,2008 Oracle. All rights reserved. 5 * 6 * $Id: hash_stat.c,v 12.23 2008/03/24 17:22:51 mbrey Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/db_page.h" 13#include "dbinc/btree.h" 14#include "dbinc/hash.h" 15#include "dbinc/mp.h" 16 17#ifdef HAVE_STATISTICS 18static int __ham_stat_callback __P((DBC *, PAGE *, void *, int *)); 19 20/* 21 * __ham_stat -- 22 * Gather/print the hash statistics 23 * 24 * PUBLIC: int __ham_stat __P((DBC *, void *, u_int32_t)); 25 */ 26int 27__ham_stat(dbc, spp, flags) 28 DBC *dbc; 29 void *spp; 30 u_int32_t flags; 31{ 32 DB *dbp; 33 DB_HASH_STAT *sp; 34 DB_MPOOLFILE *mpf; 35 ENV *env; 36 HASH_CURSOR *hcp; 37 PAGE *h; 38 db_pgno_t pgno; 39 int ret; 40 41 dbp = dbc->dbp; 42 env = dbp->env; 43 44 mpf = dbp->mpf; 45 sp = NULL; 46 47 hcp = (HASH_CURSOR *)dbc->internal; 48 49 if ((ret = __ham_get_meta(dbc)) != 0) 50 goto err; 51 52 /* Allocate and clear the structure. */ 53 if ((ret = __os_umalloc(env, sizeof(*sp), &sp)) != 0) 54 goto err; 55 memset(sp, 0, sizeof(*sp)); 56 /* Copy the fields that we have. */ 57 sp->hash_nkeys = hcp->hdr->dbmeta.key_count; 58 sp->hash_ndata = hcp->hdr->dbmeta.record_count; 59 /* 60 * Don't take the page number from the meta-data page -- that value is 61 * only maintained in the primary database, we may have been called on 62 * a subdatabase. 63 */ 64 if ((ret = __memp_get_last_pgno(dbp->mpf, &pgno)) != 0) 65 goto err; 66 sp->hash_pagecnt = pgno + 1; 67 sp->hash_pagesize = dbp->pgsize; 68 sp->hash_buckets = hcp->hdr->max_bucket + 1; 69 sp->hash_magic = hcp->hdr->dbmeta.magic; 70 sp->hash_version = hcp->hdr->dbmeta.version; 71 sp->hash_metaflags = hcp->hdr->dbmeta.flags; 72 sp->hash_ffactor = hcp->hdr->ffactor; 73 74 if (flags == DB_FAST_STAT) 75 goto done; 76 77 /* Walk the free list, counting pages. */ 78 for (sp->hash_free = 0, pgno = hcp->hdr->dbmeta.free; 79 pgno != PGNO_INVALID;) { 80 ++sp->hash_free; 81 82 if ((ret = __memp_fget(mpf, 83 &pgno, dbc->thread_info, dbc->txn, 0, &h)) != 0) 84 goto err; 85 86 pgno = h->next_pgno; 87 (void)__memp_fput(mpf, dbc->thread_info, h, dbc->priority); 88 } 89 90 /* Now traverse the rest of the table. */ 91 sp->hash_nkeys = 0; 92 sp->hash_ndata = 0; 93 if ((ret = __ham_traverse(dbc, 94 DB_LOCK_READ, __ham_stat_callback, sp, 0)) != 0) 95 goto err; 96 97 if (!F_ISSET(dbp, DB_AM_RDONLY)) { 98 /* 99 * A transaction is not required for DB->stat, so this update 100 * can't safely make a copy of the meta page. We have to 101 * update in place. 102 */ 103 if ((ret = __ham_dirty_meta(dbc, 104 (dbc->txn == NULL) ? DB_MPOOL_EDIT : 0)) != 0) 105 goto err; 106 hcp->hdr->dbmeta.key_count = sp->hash_nkeys; 107 hcp->hdr->dbmeta.record_count = sp->hash_ndata; 108 } 109 110done: if ((ret = __ham_release_meta(dbc)) != 0) 111 goto err; 112 113 *(DB_HASH_STAT **)spp = sp; 114 return (0); 115 116err: if (sp != NULL) 117 __os_ufree(env, sp); 118 119 if (hcp->hdr != NULL) 120 (void)__ham_release_meta(dbc); 121 122 return (ret); 123} 124 125/* 126 * __ham_stat_print -- 127 * Display hash statistics. 128 * 129 * PUBLIC: int __ham_stat_print __P((DBC *, u_int32_t)); 130 */ 131int 132__ham_stat_print(dbc, flags) 133 DBC *dbc; 134 u_int32_t flags; 135{ 136 static const FN fn[] = { 137 { DB_HASH_DUP, "duplicates" }, 138 { DB_HASH_SUBDB, "multiple-databases" }, 139 { DB_HASH_DUPSORT, "sorted duplicates" }, 140 { 0, NULL } 141 }; 142 DB *dbp; 143 ENV *env; 144 DB_HASH_STAT *sp; 145 int lorder, ret; 146 const char *s; 147 148 dbp = dbc->dbp; 149 env = dbp->env; 150 151 if ((ret = __ham_stat(dbc, &sp, LF_ISSET(DB_FAST_STAT))) != 0) 152 return (ret); 153 154 if (LF_ISSET(DB_STAT_ALL)) { 155 __db_msg(env, "%s", DB_GLOBAL(db_line)); 156 __db_msg(env, "Default Hash database information:"); 157 } 158 __db_msg(env, "%lx\tHash magic number", (u_long)sp->hash_magic); 159 __db_msg(env, 160 "%lu\tHash version number", (u_long)sp->hash_version); 161 (void)__db_get_lorder(dbp, &lorder); 162 switch (lorder) { 163 case 1234: 164 s = "Little-endian"; 165 break; 166 case 4321: 167 s = "Big-endian"; 168 break; 169 default: 170 s = "Unrecognized byte order"; 171 break; 172 } 173 __db_msg(env, "%s\tByte order", s); 174 __db_prflags(env, NULL, sp->hash_metaflags, fn, NULL, "\tFlags"); 175 __db_dl(env, 176 "Number of pages in the database", (u_long)sp->hash_pagecnt); 177 __db_dl(env, 178 "Underlying database page size", (u_long)sp->hash_pagesize); 179 __db_dl(env, "Specified fill factor", (u_long)sp->hash_ffactor); 180 __db_dl(env, 181 "Number of keys in the database", (u_long)sp->hash_nkeys); 182 __db_dl(env, 183 "Number of data items in the database", (u_long)sp->hash_ndata); 184 185 __db_dl(env, "Number of hash buckets", (u_long)sp->hash_buckets); 186 __db_dl_pct(env, "Number of bytes free on bucket pages", 187 (u_long)sp->hash_bfree, DB_PCT_PG( 188 sp->hash_bfree, sp->hash_buckets, sp->hash_pagesize), "ff"); 189 190 __db_dl(env, 191 "Number of overflow pages", (u_long)sp->hash_bigpages); 192 __db_dl_pct(env, "Number of bytes free in overflow pages", 193 (u_long)sp->hash_big_bfree, DB_PCT_PG( 194 sp->hash_big_bfree, sp->hash_bigpages, sp->hash_pagesize), "ff"); 195 196 __db_dl(env, 197 "Number of bucket overflow pages", (u_long)sp->hash_overflows); 198 __db_dl_pct(env, 199 "Number of bytes free in bucket overflow pages", 200 (u_long)sp->hash_ovfl_free, DB_PCT_PG( 201 sp->hash_ovfl_free, sp->hash_overflows, sp->hash_pagesize), "ff"); 202 203 __db_dl(env, "Number of duplicate pages", (u_long)sp->hash_dup); 204 __db_dl_pct(env, "Number of bytes free in duplicate pages", 205 (u_long)sp->hash_dup_free, DB_PCT_PG( 206 sp->hash_dup_free, sp->hash_dup, sp->hash_pagesize), "ff"); 207 208 __db_dl(env, 209 "Number of pages on the free list", (u_long)sp->hash_free); 210 211 __os_ufree(env, sp); 212 213 return (0); 214} 215 216static int 217__ham_stat_callback(dbc, pagep, cookie, putp) 218 DBC *dbc; 219 PAGE *pagep; 220 void *cookie; 221 int *putp; 222{ 223 DB *dbp; 224 DB_BTREE_STAT bstat; 225 DB_HASH_STAT *sp; 226 db_indx_t indx, len, off, tlen, top; 227 u_int8_t *hk; 228 int ret; 229 230 *putp = 0; 231 sp = cookie; 232 dbp = dbc->dbp; 233 234 switch (pagep->type) { 235 case P_INVALID: 236 /* 237 * Hash pages may be wholly zeroed; this is not a bug. 238 * Obviously such pages have no data, so we can just proceed. 239 */ 240 break; 241 case P_HASH_UNSORTED: 242 case P_HASH: 243 /* 244 * We count the buckets and the overflow pages 245 * separately and tally their bytes separately 246 * as well. We need to figure out if this page 247 * is a bucket. 248 */ 249 if (PREV_PGNO(pagep) == PGNO_INVALID) 250 sp->hash_bfree += P_FREESPACE(dbp, pagep); 251 else { 252 sp->hash_overflows++; 253 sp->hash_ovfl_free += P_FREESPACE(dbp, pagep); 254 } 255 top = NUM_ENT(pagep); 256 /* Correct for on-page duplicates and deleted items. */ 257 for (indx = 0; indx < top; indx += P_INDX) { 258 switch (*H_PAIRDATA(dbp, pagep, indx)) { 259 case H_OFFDUP: 260 break; 261 case H_OFFPAGE: 262 case H_KEYDATA: 263 sp->hash_ndata++; 264 break; 265 case H_DUPLICATE: 266 tlen = LEN_HDATA(dbp, pagep, 0, indx); 267 hk = H_PAIRDATA(dbp, pagep, indx); 268 for (off = 0; off < tlen; 269 off += len + 2 * sizeof(db_indx_t)) { 270 sp->hash_ndata++; 271 memcpy(&len, 272 HKEYDATA_DATA(hk) 273 + off, sizeof(db_indx_t)); 274 } 275 break; 276 default: 277 return (__db_pgfmt(dbp->env, PGNO(pagep))); 278 } 279 } 280 sp->hash_nkeys += H_NUMPAIRS(pagep); 281 break; 282 case P_IBTREE: 283 case P_IRECNO: 284 case P_LBTREE: 285 case P_LRECNO: 286 case P_LDUP: 287 /* 288 * These are all btree pages; get a correct 289 * cookie and call them. Then add appropriate 290 * fields into our stat structure. 291 */ 292 memset(&bstat, 0, sizeof(bstat)); 293 if ((ret = __bam_stat_callback(dbc, pagep, &bstat, putp)) != 0) 294 return (ret); 295 sp->hash_dup++; 296 sp->hash_dup_free += bstat.bt_leaf_pgfree + 297 bstat.bt_dup_pgfree + bstat.bt_int_pgfree; 298 sp->hash_ndata += bstat.bt_ndata; 299 break; 300 case P_OVERFLOW: 301 sp->hash_bigpages++; 302 sp->hash_big_bfree += P_OVFLSPACE(dbp, dbp->pgsize, pagep); 303 break; 304 default: 305 return (__db_pgfmt(dbp->env, PGNO(pagep))); 306 } 307 308 return (0); 309} 310 311/* 312 * __ham_print_cursor -- 313 * Display the current cursor. 314 * 315 * PUBLIC: void __ham_print_cursor __P((DBC *)); 316 */ 317void 318__ham_print_cursor(dbc) 319 DBC *dbc; 320{ 321 static const FN fn[] = { 322 { H_CONTINUE, "H_CONTINUE" }, 323 { H_DELETED, "H_DELETED" }, 324 { H_DUPONLY, "H_DUPONLY" }, 325 { H_EXPAND, "H_EXPAND" }, 326 { H_ISDUP, "H_ISDUP" }, 327 { H_NEXT_NODUP, "H_NEXT_NODUP" }, 328 { H_NOMORE, "H_NOMORE" }, 329 { H_OK, "H_OK" }, 330 { 0, NULL } 331 }; 332 ENV *env; 333 HASH_CURSOR *cp; 334 335 env = dbc->env; 336 cp = (HASH_CURSOR *)dbc->internal; 337 338 STAT_ULONG("Bucket traversing", cp->bucket); 339 STAT_ULONG("Bucket locked", cp->lbucket); 340 STAT_ULONG("Duplicate set offset", cp->dup_off); 341 STAT_ULONG("Current duplicate length", cp->dup_len); 342 STAT_ULONG("Total duplicate set length", cp->dup_tlen); 343 STAT_ULONG("Bytes needed for add", cp->seek_size); 344 STAT_ULONG("Page on which we can insert", cp->seek_found_page); 345 STAT_ULONG("Order", cp->order); 346 __db_prflags(env, NULL, cp->flags, fn, NULL, "\tInternal Flags"); 347} 348 349#else /* !HAVE_STATISTICS */ 350 351int 352__ham_stat(dbc, spp, flags) 353 DBC *dbc; 354 void *spp; 355 u_int32_t flags; 356{ 357 COMPQUIET(spp, NULL); 358 COMPQUIET(flags, 0); 359 360 return (__db_stat_not_built(dbc->env)); 361} 362#endif 363 364/* 365 * __ham_traverse 366 * Traverse an entire hash table. We use the callback so that we 367 * can use this both for stat collection and for deallocation. 368 * 369 * PUBLIC: int __ham_traverse __P((DBC *, db_lockmode_t, 370 * PUBLIC: int (*)(DBC *, PAGE *, void *, int *), void *, int)); 371 */ 372int 373__ham_traverse(dbc, mode, callback, cookie, look_past_max) 374 DBC *dbc; 375 db_lockmode_t mode; 376 int (*callback) __P((DBC *, PAGE *, void *, int *)); 377 void *cookie; 378 int look_past_max; 379{ 380 DB *dbp; 381 DBC *opd; 382 DB_MPOOLFILE *mpf; 383 HASH_CURSOR *hcp; 384 HKEYDATA *hk; 385 db_pgno_t pgno, opgno; 386 int did_put, i, ret, t_ret; 387 u_int32_t bucket, spares_entry; 388 389 dbp = dbc->dbp; 390 opd = NULL; 391 mpf = dbp->mpf; 392 hcp = (HASH_CURSOR *)dbc->internal; 393 ret = 0; 394 395 /* 396 * In a perfect world, we could simply read each page in the file 397 * and look at its page type to tally the information necessary. 398 * Unfortunately, the bucket locking that hash tables do to make 399 * locking easy, makes this a pain in the butt. We have to traverse 400 * duplicate, overflow and big pages from the bucket so that we 401 * don't access anything that isn't properly locked. 402 * 403 */ 404 for (bucket = 0;; bucket++) { 405 /* 406 * We put the loop exit condition check here, because 407 * it made for a really vile extended ?: that made SCO's 408 * compiler drop core. 409 * 410 * If look_past_max is not set, we can stop at max_bucket; 411 * if it is set, we need to include pages that are part of 412 * the current doubling but beyond the highest bucket we've 413 * split into, as well as pages from a "future" doubling 414 * that may have been created within an aborted 415 * transaction. To do this, keep looping (and incrementing 416 * bucket) until the corresponding spares array entries 417 * cease to be defined. 418 */ 419 if (look_past_max) { 420 spares_entry = __db_log2(bucket + 1); 421 if (spares_entry >= NCACHED || 422 hcp->hdr->spares[spares_entry] == 0) 423 break; 424 } else { 425 if (bucket > hcp->hdr->max_bucket) 426 break; 427 } 428 429 hcp->bucket = bucket; 430 hcp->pgno = pgno = BUCKET_TO_PAGE(hcp, bucket); 431 for (ret = __ham_get_cpage(dbc, mode); ret == 0; 432 ret = __ham_next_cpage(dbc, pgno)) { 433 434 /* 435 * If we are cleaning up pages past the max_bucket, 436 * then they may be on the free list and have their 437 * next pointers set, but they should be ignored. In 438 * fact, we really ought to just skip anybody who is 439 * not a valid page. 440 */ 441 if (TYPE(hcp->page) == P_INVALID) 442 break; 443 pgno = NEXT_PGNO(hcp->page); 444 445 /* 446 * Go through each item on the page checking for 447 * duplicates (in which case we have to count the 448 * duplicate pages) or big key/data items (in which 449 * case we have to count those pages). 450 */ 451 for (i = 0; i < NUM_ENT(hcp->page); i++) { 452 hk = (HKEYDATA *)P_ENTRY(dbp, hcp->page, i); 453 switch (HPAGE_PTYPE(hk)) { 454 case H_OFFDUP: 455 memcpy(&opgno, HOFFDUP_PGNO(hk), 456 sizeof(db_pgno_t)); 457 if ((ret = __dbc_newopd(dbc, 458 opgno, NULL, &opd)) != 0) 459 return (ret); 460 if ((ret = __bam_traverse(opd, 461 DB_LOCK_READ, opgno, 462 callback, cookie)) 463 != 0) 464 goto err; 465 if ((ret = __dbc_close(opd)) != 0) 466 return (ret); 467 opd = NULL; 468 break; 469 case H_OFFPAGE: 470 /* 471 * We are about to get a big page 472 * which will use the same spot that 473 * the current page uses, so we need 474 * to restore the current page before 475 * looking at it again. 476 */ 477 memcpy(&opgno, HOFFPAGE_PGNO(hk), 478 sizeof(db_pgno_t)); 479 if ((ret = __db_traverse_big(dbc, 480 opgno, callback, cookie)) != 0) 481 goto err; 482 break; 483 case H_KEYDATA: 484 case H_DUPLICATE: 485 break; 486 default: 487 ret = __db_unknown_path( 488 dbp->env, "__ham_traverse"); 489 goto err; 490 } 491 } 492 493 /* Call the callback on main pages. */ 494 if ((ret = callback(dbc, 495 hcp->page, cookie, &did_put)) != 0) 496 goto err; 497 498 if (did_put) 499 hcp->page = NULL; 500 if (pgno == PGNO_INVALID) 501 break; 502 } 503 if (ret != 0) 504 goto err; 505 506 if (hcp->page != NULL) { 507 if ((ret = __memp_fput(mpf, 508 dbc->thread_info, hcp->page, dbc->priority)) != 0) 509 return (ret); 510 hcp->page = NULL; 511 } 512 513 } 514err: if (opd != NULL && 515 (t_ret = __dbc_close(opd)) != 0 && ret == 0) 516 ret = t_ret; 517 return (ret); 518} 519