1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996,2008 Oracle.  All rights reserved.
5 *
6 * $Id: hash_stat.c,v 12.23 2008/03/24 17:22:51 mbrey Exp $
7 */
8
9#include "db_config.h"
10
11#include "db_int.h"
12#include "dbinc/db_page.h"
13#include "dbinc/btree.h"
14#include "dbinc/hash.h"
15#include "dbinc/mp.h"
16
17#ifdef HAVE_STATISTICS
18static int __ham_stat_callback __P((DBC *, PAGE *, void *, int *));
19
20/*
21 * __ham_stat --
22 *	Gather/print the hash statistics
23 *
24 * PUBLIC: int __ham_stat __P((DBC *, void *, u_int32_t));
25 */
26int
27__ham_stat(dbc, spp, flags)
28	DBC *dbc;
29	void *spp;
30	u_int32_t flags;
31{
32	DB *dbp;
33	DB_HASH_STAT *sp;
34	DB_MPOOLFILE *mpf;
35	ENV *env;
36	HASH_CURSOR *hcp;
37	PAGE *h;
38	db_pgno_t pgno;
39	int ret;
40
41	dbp = dbc->dbp;
42	env = dbp->env;
43
44	mpf = dbp->mpf;
45	sp = NULL;
46
47	hcp = (HASH_CURSOR *)dbc->internal;
48
49	if ((ret = __ham_get_meta(dbc)) != 0)
50		goto err;
51
52	/* Allocate and clear the structure. */
53	if ((ret = __os_umalloc(env, sizeof(*sp), &sp)) != 0)
54		goto err;
55	memset(sp, 0, sizeof(*sp));
56	/* Copy the fields that we have. */
57	sp->hash_nkeys = hcp->hdr->dbmeta.key_count;
58	sp->hash_ndata = hcp->hdr->dbmeta.record_count;
59	/*
60	 * Don't take the page number from the meta-data page -- that value is
61	 * only maintained in the primary database, we may have been called on
62	 * a subdatabase.
63	 */
64	if ((ret = __memp_get_last_pgno(dbp->mpf, &pgno)) != 0)
65		goto err;
66	sp->hash_pagecnt = pgno + 1;
67	sp->hash_pagesize = dbp->pgsize;
68	sp->hash_buckets = hcp->hdr->max_bucket + 1;
69	sp->hash_magic = hcp->hdr->dbmeta.magic;
70	sp->hash_version = hcp->hdr->dbmeta.version;
71	sp->hash_metaflags = hcp->hdr->dbmeta.flags;
72	sp->hash_ffactor = hcp->hdr->ffactor;
73
74	if (flags == DB_FAST_STAT)
75		goto done;
76
77	/* Walk the free list, counting pages. */
78	for (sp->hash_free = 0, pgno = hcp->hdr->dbmeta.free;
79	    pgno != PGNO_INVALID;) {
80		++sp->hash_free;
81
82		if ((ret = __memp_fget(mpf,
83		     &pgno, dbc->thread_info, dbc->txn, 0, &h)) != 0)
84			goto err;
85
86		pgno = h->next_pgno;
87		(void)__memp_fput(mpf, dbc->thread_info, h, dbc->priority);
88	}
89
90	/* Now traverse the rest of the table. */
91	sp->hash_nkeys = 0;
92	sp->hash_ndata = 0;
93	if ((ret = __ham_traverse(dbc,
94	    DB_LOCK_READ, __ham_stat_callback, sp, 0)) != 0)
95		goto err;
96
97	if (!F_ISSET(dbp, DB_AM_RDONLY)) {
98		/*
99		 * A transaction is not required for DB->stat, so this update
100		 * can't safely make a copy of the meta page.  We have to
101		 * update in place.
102		 */
103		if ((ret = __ham_dirty_meta(dbc,
104		    (dbc->txn == NULL) ? DB_MPOOL_EDIT : 0)) != 0)
105			goto err;
106		hcp->hdr->dbmeta.key_count = sp->hash_nkeys;
107		hcp->hdr->dbmeta.record_count = sp->hash_ndata;
108	}
109
110done:	if ((ret = __ham_release_meta(dbc)) != 0)
111		goto err;
112
113	*(DB_HASH_STAT **)spp = sp;
114	return (0);
115
116err:	if (sp != NULL)
117		__os_ufree(env, sp);
118
119	if (hcp->hdr != NULL)
120		(void)__ham_release_meta(dbc);
121
122	return (ret);
123}
124
125/*
126 * __ham_stat_print --
127 *	Display hash statistics.
128 *
129 * PUBLIC: int __ham_stat_print __P((DBC *, u_int32_t));
130 */
131int
132__ham_stat_print(dbc, flags)
133	DBC *dbc;
134	u_int32_t flags;
135{
136	static const FN fn[] = {
137		{ DB_HASH_DUP,		"duplicates" },
138		{ DB_HASH_SUBDB,	"multiple-databases" },
139		{ DB_HASH_DUPSORT,	"sorted duplicates" },
140		{ 0,			NULL }
141	};
142	DB *dbp;
143	ENV *env;
144	DB_HASH_STAT *sp;
145	int lorder, ret;
146	const char *s;
147
148	dbp = dbc->dbp;
149	env = dbp->env;
150
151	if ((ret = __ham_stat(dbc, &sp, LF_ISSET(DB_FAST_STAT))) != 0)
152		return (ret);
153
154	if (LF_ISSET(DB_STAT_ALL)) {
155		__db_msg(env, "%s", DB_GLOBAL(db_line));
156		__db_msg(env, "Default Hash database information:");
157	}
158	__db_msg(env, "%lx\tHash magic number", (u_long)sp->hash_magic);
159	__db_msg(env,
160	    "%lu\tHash version number", (u_long)sp->hash_version);
161	(void)__db_get_lorder(dbp, &lorder);
162	switch (lorder) {
163	case 1234:
164		s = "Little-endian";
165		break;
166	case 4321:
167		s = "Big-endian";
168		break;
169	default:
170		s = "Unrecognized byte order";
171		break;
172	}
173	__db_msg(env, "%s\tByte order", s);
174	__db_prflags(env, NULL, sp->hash_metaflags, fn, NULL, "\tFlags");
175	__db_dl(env,
176	    "Number of pages in the database", (u_long)sp->hash_pagecnt);
177	__db_dl(env,
178	    "Underlying database page size", (u_long)sp->hash_pagesize);
179	__db_dl(env, "Specified fill factor", (u_long)sp->hash_ffactor);
180	__db_dl(env,
181	    "Number of keys in the database", (u_long)sp->hash_nkeys);
182	__db_dl(env,
183	    "Number of data items in the database", (u_long)sp->hash_ndata);
184
185	__db_dl(env, "Number of hash buckets", (u_long)sp->hash_buckets);
186	__db_dl_pct(env, "Number of bytes free on bucket pages",
187	    (u_long)sp->hash_bfree, DB_PCT_PG(
188	    sp->hash_bfree, sp->hash_buckets, sp->hash_pagesize), "ff");
189
190	__db_dl(env,
191	    "Number of overflow pages", (u_long)sp->hash_bigpages);
192	__db_dl_pct(env, "Number of bytes free in overflow pages",
193	    (u_long)sp->hash_big_bfree, DB_PCT_PG(
194	    sp->hash_big_bfree, sp->hash_bigpages, sp->hash_pagesize), "ff");
195
196	__db_dl(env,
197	    "Number of bucket overflow pages", (u_long)sp->hash_overflows);
198	__db_dl_pct(env,
199	    "Number of bytes free in bucket overflow pages",
200	    (u_long)sp->hash_ovfl_free, DB_PCT_PG(
201	    sp->hash_ovfl_free, sp->hash_overflows, sp->hash_pagesize), "ff");
202
203	__db_dl(env, "Number of duplicate pages", (u_long)sp->hash_dup);
204	__db_dl_pct(env, "Number of bytes free in duplicate pages",
205	    (u_long)sp->hash_dup_free, DB_PCT_PG(
206	    sp->hash_dup_free, sp->hash_dup, sp->hash_pagesize), "ff");
207
208	__db_dl(env,
209	    "Number of pages on the free list", (u_long)sp->hash_free);
210
211	__os_ufree(env, sp);
212
213	return (0);
214}
215
216static int
217__ham_stat_callback(dbc, pagep, cookie, putp)
218	DBC *dbc;
219	PAGE *pagep;
220	void *cookie;
221	int *putp;
222{
223	DB *dbp;
224	DB_BTREE_STAT bstat;
225	DB_HASH_STAT *sp;
226	db_indx_t indx, len, off, tlen, top;
227	u_int8_t *hk;
228	int ret;
229
230	*putp = 0;
231	sp = cookie;
232	dbp = dbc->dbp;
233
234	switch (pagep->type) {
235	case P_INVALID:
236		/*
237		 * Hash pages may be wholly zeroed;  this is not a bug.
238		 * Obviously such pages have no data, so we can just proceed.
239		 */
240		break;
241	case P_HASH_UNSORTED:
242	case P_HASH:
243		/*
244		 * We count the buckets and the overflow pages
245		 * separately and tally their bytes separately
246		 * as well.  We need to figure out if this page
247		 * is a bucket.
248		 */
249		if (PREV_PGNO(pagep) == PGNO_INVALID)
250			sp->hash_bfree += P_FREESPACE(dbp, pagep);
251		else {
252			sp->hash_overflows++;
253			sp->hash_ovfl_free += P_FREESPACE(dbp, pagep);
254		}
255		top = NUM_ENT(pagep);
256		/* Correct for on-page duplicates and deleted items. */
257		for (indx = 0; indx < top; indx += P_INDX) {
258			switch (*H_PAIRDATA(dbp, pagep, indx)) {
259			case H_OFFDUP:
260				break;
261			case H_OFFPAGE:
262			case H_KEYDATA:
263				sp->hash_ndata++;
264				break;
265			case H_DUPLICATE:
266				tlen = LEN_HDATA(dbp, pagep, 0, indx);
267				hk = H_PAIRDATA(dbp, pagep, indx);
268				for (off = 0; off < tlen;
269				    off += len + 2 * sizeof(db_indx_t)) {
270					sp->hash_ndata++;
271					memcpy(&len,
272					    HKEYDATA_DATA(hk)
273					    + off, sizeof(db_indx_t));
274				}
275				break;
276			default:
277				return (__db_pgfmt(dbp->env, PGNO(pagep)));
278			}
279		}
280		sp->hash_nkeys += H_NUMPAIRS(pagep);
281		break;
282	case P_IBTREE:
283	case P_IRECNO:
284	case P_LBTREE:
285	case P_LRECNO:
286	case P_LDUP:
287		/*
288		 * These are all btree pages; get a correct
289		 * cookie and call them.  Then add appropriate
290		 * fields into our stat structure.
291		 */
292		memset(&bstat, 0, sizeof(bstat));
293		if ((ret = __bam_stat_callback(dbc, pagep, &bstat, putp)) != 0)
294			return (ret);
295		sp->hash_dup++;
296		sp->hash_dup_free += bstat.bt_leaf_pgfree +
297		    bstat.bt_dup_pgfree + bstat.bt_int_pgfree;
298		sp->hash_ndata += bstat.bt_ndata;
299		break;
300	case P_OVERFLOW:
301		sp->hash_bigpages++;
302		sp->hash_big_bfree += P_OVFLSPACE(dbp, dbp->pgsize, pagep);
303		break;
304	default:
305		return (__db_pgfmt(dbp->env, PGNO(pagep)));
306	}
307
308	return (0);
309}
310
311/*
312 * __ham_print_cursor --
313 *	Display the current cursor.
314 *
315 * PUBLIC: void __ham_print_cursor __P((DBC *));
316 */
317void
318__ham_print_cursor(dbc)
319	DBC *dbc;
320{
321	static const FN fn[] = {
322		{ H_CONTINUE,	"H_CONTINUE" },
323		{ H_DELETED,	"H_DELETED" },
324		{ H_DUPONLY,	"H_DUPONLY" },
325		{ H_EXPAND,	"H_EXPAND" },
326		{ H_ISDUP,	"H_ISDUP" },
327		{ H_NEXT_NODUP,	"H_NEXT_NODUP" },
328		{ H_NOMORE,	"H_NOMORE" },
329		{ H_OK,		"H_OK" },
330		{ 0,		NULL }
331	};
332	ENV *env;
333	HASH_CURSOR *cp;
334
335	env = dbc->env;
336	cp = (HASH_CURSOR *)dbc->internal;
337
338	STAT_ULONG("Bucket traversing", cp->bucket);
339	STAT_ULONG("Bucket locked", cp->lbucket);
340	STAT_ULONG("Duplicate set offset", cp->dup_off);
341	STAT_ULONG("Current duplicate length", cp->dup_len);
342	STAT_ULONG("Total duplicate set length", cp->dup_tlen);
343	STAT_ULONG("Bytes needed for add", cp->seek_size);
344	STAT_ULONG("Page on which we can insert", cp->seek_found_page);
345	STAT_ULONG("Order", cp->order);
346	__db_prflags(env, NULL, cp->flags, fn, NULL, "\tInternal Flags");
347}
348
349#else /* !HAVE_STATISTICS */
350
351int
352__ham_stat(dbc, spp, flags)
353	DBC *dbc;
354	void *spp;
355	u_int32_t flags;
356{
357	COMPQUIET(spp, NULL);
358	COMPQUIET(flags, 0);
359
360	return (__db_stat_not_built(dbc->env));
361}
362#endif
363
364/*
365 * __ham_traverse
366 *	 Traverse an entire hash table.  We use the callback so that we
367 * can use this both for stat collection and for deallocation.
368 *
369 * PUBLIC: int __ham_traverse __P((DBC *, db_lockmode_t,
370 * PUBLIC:     int (*)(DBC *, PAGE *, void *, int *), void *, int));
371 */
372int
373__ham_traverse(dbc, mode, callback, cookie, look_past_max)
374	DBC *dbc;
375	db_lockmode_t mode;
376	int (*callback) __P((DBC *, PAGE *, void *, int *));
377	void *cookie;
378	int look_past_max;
379{
380	DB *dbp;
381	DBC *opd;
382	DB_MPOOLFILE *mpf;
383	HASH_CURSOR *hcp;
384	HKEYDATA *hk;
385	db_pgno_t pgno, opgno;
386	int did_put, i, ret, t_ret;
387	u_int32_t bucket, spares_entry;
388
389	dbp = dbc->dbp;
390	opd = NULL;
391	mpf = dbp->mpf;
392	hcp = (HASH_CURSOR *)dbc->internal;
393	ret = 0;
394
395	/*
396	 * In a perfect world, we could simply read each page in the file
397	 * and look at its page type to tally the information necessary.
398	 * Unfortunately, the bucket locking that hash tables do to make
399	 * locking easy, makes this a pain in the butt.  We have to traverse
400	 * duplicate, overflow and big pages from the bucket so that we
401	 * don't access anything that isn't properly locked.
402	 *
403	 */
404	for (bucket = 0;; bucket++) {
405		/*
406		 * We put the loop exit condition check here, because
407		 * it made for a really vile extended ?: that made SCO's
408		 * compiler drop core.
409		 *
410		 * If look_past_max is not set, we can stop at max_bucket;
411		 * if it is set, we need to include pages that are part of
412		 * the current doubling but beyond the highest bucket we've
413		 * split into, as well as pages from a "future" doubling
414		 * that may have been created within an aborted
415		 * transaction.  To do this, keep looping (and incrementing
416		 * bucket) until the corresponding spares array entries
417		 * cease to be defined.
418		 */
419		if (look_past_max) {
420			spares_entry = __db_log2(bucket + 1);
421			if (spares_entry >= NCACHED ||
422			    hcp->hdr->spares[spares_entry] == 0)
423				break;
424		} else {
425			if (bucket > hcp->hdr->max_bucket)
426				break;
427		}
428
429		hcp->bucket = bucket;
430		hcp->pgno = pgno = BUCKET_TO_PAGE(hcp, bucket);
431		for (ret = __ham_get_cpage(dbc, mode); ret == 0;
432		    ret = __ham_next_cpage(dbc, pgno)) {
433
434			/*
435			 * If we are cleaning up pages past the max_bucket,
436			 * then they may be on the free list and have their
437			 * next pointers set, but they should be ignored.  In
438			 * fact, we really ought to just skip anybody who is
439			 * not a valid page.
440			 */
441			if (TYPE(hcp->page) == P_INVALID)
442				break;
443			pgno = NEXT_PGNO(hcp->page);
444
445			/*
446			 * Go through each item on the page checking for
447			 * duplicates (in which case we have to count the
448			 * duplicate pages) or big key/data items (in which
449			 * case we have to count those pages).
450			 */
451			for (i = 0; i < NUM_ENT(hcp->page); i++) {
452				hk = (HKEYDATA *)P_ENTRY(dbp, hcp->page, i);
453				switch (HPAGE_PTYPE(hk)) {
454				case H_OFFDUP:
455					memcpy(&opgno, HOFFDUP_PGNO(hk),
456					    sizeof(db_pgno_t));
457					if ((ret = __dbc_newopd(dbc,
458					    opgno, NULL, &opd)) != 0)
459						return (ret);
460					if ((ret = __bam_traverse(opd,
461					    DB_LOCK_READ, opgno,
462					    callback, cookie))
463					    != 0)
464						goto err;
465					if ((ret = __dbc_close(opd)) != 0)
466						return (ret);
467					opd = NULL;
468					break;
469				case H_OFFPAGE:
470					/*
471					 * We are about to get a big page
472					 * which will use the same spot that
473					 * the current page uses, so we need
474					 * to restore the current page before
475					 * looking at it again.
476					 */
477					memcpy(&opgno, HOFFPAGE_PGNO(hk),
478					    sizeof(db_pgno_t));
479					if ((ret = __db_traverse_big(dbc,
480					    opgno, callback, cookie)) != 0)
481						goto err;
482					break;
483				case H_KEYDATA:
484				case H_DUPLICATE:
485					break;
486				default:
487					ret = __db_unknown_path(
488					    dbp->env, "__ham_traverse");
489					goto err;
490				}
491			}
492
493			/* Call the callback on main pages. */
494			if ((ret = callback(dbc,
495			    hcp->page, cookie, &did_put)) != 0)
496				goto err;
497
498			if (did_put)
499				hcp->page = NULL;
500			if (pgno == PGNO_INVALID)
501				break;
502		}
503		if (ret != 0)
504			goto err;
505
506		if (hcp->page != NULL) {
507			if ((ret = __memp_fput(mpf,
508			    dbc->thread_info, hcp->page, dbc->priority)) != 0)
509				return (ret);
510			hcp->page = NULL;
511		}
512
513	}
514err:	if (opd != NULL &&
515	    (t_ret = __dbc_close(opd)) != 0 && ret == 0)
516		ret = t_ret;
517	return (ret);
518}
519